blob: bec8b556449001e27b2e9540d43961501d462390 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000011import cookielib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000012import ctypes
vadimsh@chromium.org80f73002013-07-12 14:52:44 +000013import functools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000014import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000015import httplib
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000016import itertools
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000017import json
18import logging
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000019import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000020import optparse
21import os
22import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000023import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000024import re
25import shutil
vadimsh@chromium.org80f73002013-07-12 14:52:44 +000026import socket
27import ssl
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000028import stat
29import subprocess
30import sys
31import tempfile
32import threading
33import time
34import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000035import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000036import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000037import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000038
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000039from third_party.rietveld import upload
40from third_party.depot_tools import fix_encoding
41
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +000042from utils import lru
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +000043from utils import threading_utils
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000044from utils import tools
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +000045from utils import zip_package
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000046
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000047
48# Hack out upload logging.info()
49upload.logging = logging.getLogger('upload')
50# Mac pylint choke on this line.
51upload.logging.setLevel(logging.WARNING) # pylint: disable=E1103
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000052
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000053
vadimsh@chromium.org85071062013-08-21 23:37:45 +000054# Absolute path to this file (can be None if running from zip on Mac).
55THIS_FILE_PATH = os.path.abspath(__file__) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000056
57# Directory that contains this file (might be inside zip package).
vadimsh@chromium.org85071062013-08-21 23:37:45 +000058BASE_DIR = os.path.dirname(THIS_FILE_PATH) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000059
60# Directory that contains currently running script file.
61MAIN_DIR = os.path.dirname(os.path.abspath(zip_package.get_main_script_path()))
62
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000063# Types of action accepted by link_file().
maruel@chromium.orgba6489b2013-07-11 20:23:33 +000064HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY = range(1, 5)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000065
66RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
67
csharp@chromium.org8dc52542012-11-08 20:29:55 +000068# The file size to be used when we don't know the correct file size,
69# generally used for .isolated files.
70UNKNOWN_FILE_SIZE = None
71
csharp@chromium.orga92403f2012-11-20 15:13:59 +000072# The size of each chunk to read when downloading and unzipping files.
73ZIPPED_FILE_CHUNK = 16 * 1024
74
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000075# The name of the log file to use.
76RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
77
csharp@chromium.orge217f302012-11-22 16:51:53 +000078# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000079RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000080
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000081# The delay (in seconds) to wait between logging statements when retrieving
82# the required files. This is intended to let the user (or buildbot) know that
83# the program is still running.
84DELAY_BETWEEN_UPDATES_IN_SECS = 30
85
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +000086# Maximum expected delay (in seconds) between successive file fetches
87# in run_tha_test. If it takes longer than that, a deadlock might be happening
88# and all stack frames for all threads are dumped to log.
89DEADLOCK_TIMEOUT = 5 * 60
90
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000091# The name of the key to store the count of url attempts.
92COUNT_KEY = 'UrlOpenAttempt'
93
maruel@chromium.org2b2139a2013-04-30 20:14:58 +000094# Default maximum number of attempts to trying opening a url before aborting.
95URL_OPEN_MAX_ATTEMPTS = 30
96# Default timeout when retrying.
97URL_OPEN_TIMEOUT = 6*60.
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000098
vadimsh@chromium.org80f73002013-07-12 14:52:44 +000099# Read timeout in seconds for downloads from isolate storage. If there's no
100# response from the server within this timeout whole download will be aborted.
101DOWNLOAD_READ_TIMEOUT = 60
102
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000103# Global (for now) map: server URL (http://example.com) -> HttpService instance.
104# Used by get_http_service to cache HttpService instances.
105_http_services = {}
106_http_services_lock = threading.Lock()
107
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000108# Used by get_flavor().
109FLAVOR_MAPPING = {
110 'cygwin': 'win',
111 'win32': 'win',
112 'darwin': 'mac',
113 'sunos5': 'solaris',
114 'freebsd7': 'freebsd',
115 'freebsd8': 'freebsd',
116}
117
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000118
119class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000120 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000121 pass
122
123
124class MappingError(OSError):
125 """Failed to recreate the tree."""
126 pass
127
128
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000129class TimeoutError(IOError):
130 """Timeout while reading HTTP response."""
131
132 def __init__(self, inner_exc=None):
133 super(TimeoutError, self).__init__(str(inner_exc or 'Timeout'))
134 self.inner_exc = inner_exc
135
136
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000137def get_as_zip_package(executable=True):
138 """Returns ZipPackage with this module and all its dependencies.
139
140 If |executable| is True will store run_isolated.py as __main__.py so that
141 zip package is directly executable be python.
142 """
143 # Building a zip package when running from another zip package is
144 # unsupported and probably unneeded.
145 assert not zip_package.is_zipped_module(sys.modules[__name__])
vadimsh@chromium.org85071062013-08-21 23:37:45 +0000146 assert THIS_FILE_PATH
147 assert BASE_DIR
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000148 package = zip_package.ZipPackage(root=BASE_DIR)
149 package.add_python_file(THIS_FILE_PATH, '__main__.py' if executable else None)
150 package.add_directory(os.path.join(BASE_DIR, 'third_party'))
151 package.add_directory(os.path.join(BASE_DIR, 'utils'))
152 return package
153
154
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000155def get_flavor():
156 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
maruel@chromium.org9e9ceaa2013-04-05 15:42:42 +0000157 return FLAVOR_MAPPING.get(sys.platform, 'linux')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000158
159
160def os_link(source, link_name):
161 """Add support for os.link() on Windows."""
162 if sys.platform == 'win32':
163 if not ctypes.windll.kernel32.CreateHardLinkW(
164 unicode(link_name), unicode(source), 0):
165 raise OSError()
166 else:
167 os.link(source, link_name)
168
169
170def readable_copy(outfile, infile):
171 """Makes a copy of the file that is readable by everyone."""
csharp@chromium.org59d116d2013-07-05 18:04:08 +0000172 shutil.copy2(infile, outfile)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000173 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
174 stat.S_IRGRP | stat.S_IROTH)
175 os.chmod(outfile, read_enabled_mode)
176
177
178def link_file(outfile, infile, action):
179 """Links a file. The type of link depends on |action|."""
180 logging.debug('Mapping %s to %s' % (infile, outfile))
maruel@chromium.orgba6489b2013-07-11 20:23:33 +0000181 if action not in (HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000182 raise ValueError('Unknown mapping action %s' % action)
183 if not os.path.isfile(infile):
184 raise MappingError('%s is missing' % infile)
185 if os.path.isfile(outfile):
186 raise MappingError(
187 '%s already exist; insize:%d; outsize:%d' %
188 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
189
190 if action == COPY:
191 readable_copy(outfile, infile)
192 elif action == SYMLINK and sys.platform != 'win32':
193 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000194 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000195 else:
196 try:
197 os_link(infile, outfile)
maruel@chromium.orgba6489b2013-07-11 20:23:33 +0000198 except OSError as e:
199 if action == HARDLINK:
200 raise MappingError(
201 'Failed to hardlink %s to %s: %s' % (infile, outfile, e))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000202 # Probably a different file system.
maruel@chromium.org9e98e432013-05-31 17:06:51 +0000203 logging.warning(
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000204 'Failed to hardlink, failing back to copy %s to %s' % (
205 infile, outfile))
206 readable_copy(outfile, infile)
207
208
209def _set_write_bit(path, read_only):
210 """Sets or resets the executable bit on a file or directory."""
211 mode = os.lstat(path).st_mode
212 if read_only:
213 mode = mode & 0500
214 else:
215 mode = mode | 0200
216 if hasattr(os, 'lchmod'):
217 os.lchmod(path, mode) # pylint: disable=E1101
218 else:
219 if stat.S_ISLNK(mode):
220 # Skip symlink without lchmod() support.
221 logging.debug('Can\'t change +w bit on symlink %s' % path)
222 return
223
224 # TODO(maruel): Implement proper DACL modification on Windows.
225 os.chmod(path, mode)
226
227
228def make_writable(root, read_only):
229 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000230 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000231 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
232 for filename in filenames:
233 _set_write_bit(os.path.join(dirpath, filename), read_only)
234
235 for dirname in dirnames:
236 _set_write_bit(os.path.join(dirpath, dirname), read_only)
237
238
239def rmtree(root):
240 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
241 make_writable(root, False)
242 if sys.platform == 'win32':
243 for i in range(3):
244 try:
245 shutil.rmtree(root)
246 break
247 except WindowsError: # pylint: disable=E0602
248 delay = (i+1)*2
249 print >> sys.stderr, (
250 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
251 time.sleep(delay)
252 else:
253 shutil.rmtree(root)
254
255
256def is_same_filesystem(path1, path2):
257 """Returns True if both paths are on the same filesystem.
258
259 This is required to enable the use of hardlinks.
260 """
261 assert os.path.isabs(path1), path1
262 assert os.path.isabs(path2), path2
263 if sys.platform == 'win32':
264 # If the drive letter mismatches, assume it's a separate partition.
265 # TODO(maruel): It should look at the underlying drive, a drive letter could
266 # be a mount point to a directory on another drive.
267 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
268 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
269 if path1[0].lower() != path2[0].lower():
270 return False
271 return os.stat(path1).st_dev == os.stat(path2).st_dev
272
273
274def get_free_space(path):
275 """Returns the number of free bytes."""
276 if sys.platform == 'win32':
277 free_bytes = ctypes.c_ulonglong(0)
278 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
279 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
280 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000281 # For OSes other than Windows.
282 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000283 return f.f_bfree * f.f_frsize
284
285
286def make_temp_dir(prefix, root_dir):
287 """Returns a temporary directory on the same file system as root_dir."""
288 base_temp_dir = None
289 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
290 base_temp_dir = os.path.dirname(root_dir)
291 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
292
293
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000294def load_isolated(content, os_flavor=None):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000295 """Verifies the .isolated file is valid and loads this object with the json
296 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000297 """
298 try:
299 data = json.loads(content)
300 except ValueError:
301 raise ConfigError('Failed to parse: %s...' % content[:100])
302
303 if not isinstance(data, dict):
304 raise ConfigError('Expected dict, got %r' % data)
305
306 for key, value in data.iteritems():
307 if key == 'command':
308 if not isinstance(value, list):
309 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000310 if not value:
311 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000312 for subvalue in value:
313 if not isinstance(subvalue, basestring):
314 raise ConfigError('Expected string, got %r' % subvalue)
315
316 elif key == 'files':
317 if not isinstance(value, dict):
318 raise ConfigError('Expected dict, got %r' % value)
319 for subkey, subvalue in value.iteritems():
320 if not isinstance(subkey, basestring):
321 raise ConfigError('Expected string, got %r' % subkey)
322 if not isinstance(subvalue, dict):
323 raise ConfigError('Expected dict, got %r' % subvalue)
324 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000325 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000326 if not isinstance(subsubvalue, basestring):
327 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000328 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000329 if not isinstance(subsubvalue, int):
330 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000331 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000332 if not RE_IS_SHA1.match(subsubvalue):
333 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000334 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000335 if not isinstance(subsubvalue, int):
336 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000337 else:
338 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000339 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000340 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000341 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
342 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000343
344 elif key == 'includes':
345 if not isinstance(value, list):
346 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000347 if not value:
348 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000349 for subvalue in value:
350 if not RE_IS_SHA1.match(subvalue):
351 raise ConfigError('Expected sha-1, got %r' % subvalue)
352
353 elif key == 'read_only':
354 if not isinstance(value, bool):
355 raise ConfigError('Expected bool, got %r' % value)
356
357 elif key == 'relative_cwd':
358 if not isinstance(value, basestring):
359 raise ConfigError('Expected string, got %r' % value)
360
361 elif key == 'os':
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000362 expected_value = os_flavor or get_flavor()
363 if value != expected_value:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000364 raise ConfigError(
365 'Expected \'os\' to be \'%s\' but got \'%s\'' %
frankf@chromium.org3348ee02013-06-27 14:53:17 +0000366 (expected_value, value))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000367
368 else:
369 raise ConfigError('Unknown key %s' % key)
370
371 return data
372
373
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000374def url_open(url, **kwargs):
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000375 """Attempts to open the given url multiple times.
376
377 |data| can be either:
378 -None for a GET request
379 -str for pre-encoded data
380 -list for data to be encoded
381 -dict for data to be encoded (COUNT_KEY will be added in this case)
382
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000383 Returns HttpResponse object, where the response may be read from, or None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000384 if it was unable to connect.
385 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000386 urlhost, urlpath = split_server_request_url(url)
387 service = get_http_service(urlhost)
388 return service.request(urlpath, **kwargs)
389
390
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000391def url_read(url, **kwargs):
392 """Attempts to open the given url multiple times and read all data from it.
393
394 Accepts same arguments as url_open function.
395
396 Returns all data read or None if it was unable to connect or read the data.
397 """
398 response = url_open(url, **kwargs)
399 if not response:
400 return None
401 try:
402 return response.read()
403 except TimeoutError:
404 return None
405
406
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000407def split_server_request_url(url):
408 """Splits the url into scheme+netloc and path+params+query+fragment."""
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000409 url_parts = list(urlparse.urlparse(url))
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000410 urlhost = '%s://%s' % (url_parts[0], url_parts[1])
411 urlpath = urlparse.urlunparse(['', ''] + url_parts[2:])
412 return urlhost, urlpath
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000413
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000414
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000415def get_http_service(urlhost):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000416 """Returns existing or creates new instance of HttpService that can send
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000417 requests to given base urlhost.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000418 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000419 # Ensure consistency.
420 urlhost = str(urlhost).lower().rstrip('/')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000421 with _http_services_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000422 service = _http_services.get(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000423 if not service:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000424 service = AppEngineService(urlhost)
425 _http_services[urlhost] = service
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000426 return service
427
428
429class HttpService(object):
430 """Base class for a class that provides an API to HTTP based service:
431 - Provides 'request' method.
432 - Supports automatic request retries.
433 - Supports persistent cookies.
434 - Thread safe.
435 """
436
437 # File to use to store all auth cookies.
maruel@chromium.orgbf2a02a2013-07-11 13:27:16 +0000438 COOKIE_FILE = os.path.join(os.path.expanduser('~'), '.isolated_cookies')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000439
440 # CookieJar reused by all services + lock that protects its instantiation.
441 _cookie_jar = None
442 _cookie_jar_lock = threading.Lock()
443
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000444 def __init__(self, urlhost):
445 self.urlhost = urlhost
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000446 self.cookie_jar = self.load_cookie_jar()
447 self.opener = self.create_url_opener()
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000448
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000449 def authenticate(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000450 """Called when HTTP server asks client to authenticate.
451 Can be implemented in subclasses.
452 """
453 return False
454
455 @staticmethod
456 def load_cookie_jar():
457 """Returns global CoookieJar object that stores cookies in the file."""
458 with HttpService._cookie_jar_lock:
459 if HttpService._cookie_jar is not None:
460 return HttpService._cookie_jar
maruel@chromium.orgbf2a02a2013-07-11 13:27:16 +0000461 jar = ThreadSafeCookieJar(HttpService.COOKIE_FILE)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000462 jar.load()
463 HttpService._cookie_jar = jar
464 return jar
465
466 @staticmethod
467 def save_cookie_jar():
468 """Called when cookie jar needs to be flushed to disk."""
469 with HttpService._cookie_jar_lock:
470 if HttpService._cookie_jar is not None:
471 HttpService._cookie_jar.save()
472
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000473 def create_url_opener(self): # pylint: disable=R0201
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000474 """Returns OpenerDirector that will be used when sending requests.
475 Can be reimplemented in subclasses."""
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000476 return urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000477
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000478 def request(self, urlpath, data=None, content_type=None, **kwargs):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000479 """Attempts to open the given url multiple times.
480
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000481 |urlpath| is relative to the server root, i.e. '/some/request?param=1'.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000482
483 |data| can be either:
484 -None for a GET request
485 -str for pre-encoded data
486 -list for data to be encoded
487 -dict for data to be encoded (COUNT_KEY will be added in this case)
488
489 Returns a file-like object, where the response may be read from, or None
490 if it was unable to connect.
491 """
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000492 assert urlpath and urlpath[0] == '/'
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000493
494 if isinstance(data, dict) and COUNT_KEY in data:
495 logging.error('%s already existed in the data passed into UlrOpen. It '
496 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
497 return None
498
499 method = 'GET' if data is None else 'POST'
500 assert not ((method != 'POST') and content_type), (
501 'Can\'t use content_type on GET')
502
503 def make_request(extra):
504 """Returns a urllib2.Request instance for this specific retry."""
505 if isinstance(data, str) or data is None:
506 payload = data
507 else:
508 if isinstance(data, dict):
509 payload = data.items()
510 else:
511 payload = data[:]
512 payload.extend(extra.iteritems())
513 payload = urllib.urlencode(payload)
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000514 new_url = urlparse.urljoin(self.urlhost, urlpath[1:])
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000515 if isinstance(data, str) or data is None:
516 # In these cases, add the extra parameter to the query part of the url.
517 url_parts = list(urlparse.urlparse(new_url))
518 # Append the query parameter.
519 if url_parts[4] and extra:
520 url_parts[4] += '&'
521 url_parts[4] += urllib.urlencode(extra)
522 new_url = urlparse.urlunparse(url_parts)
523 request = urllib2.Request(new_url, data=payload)
524 if payload is not None:
525 if content_type:
526 request.add_header('Content-Type', content_type)
527 request.add_header('Content-Length', len(payload))
528 return request
529
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000530 return self._retry_loop(make_request, **kwargs)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000531
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000532 def _retry_loop(
533 self,
534 make_request,
535 max_attempts=URL_OPEN_MAX_ATTEMPTS,
536 retry_404=False,
537 retry_50x=True,
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000538 timeout=URL_OPEN_TIMEOUT,
539 read_timeout=None):
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000540 """Runs internal request-retry loop.
541
542 - Optionally retries HTTP 404 and 50x.
543 - Retries up to |max_attempts| times. If None or 0, there's no limit in the
544 number of retries.
545 - Retries up to |timeout| duration in seconds. If None or 0, there's no
546 limit in the time taken to do retries.
547 - If both |max_attempts| and |timeout| are None or 0, this functions retries
548 indefinitely.
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000549
550 If |read_timeout| is not None will configure underlying socket to
551 raise TimeoutError exception whenever there's no response from the server
552 for more than |read_timeout| seconds. It can happen during any read
553 operation so once you pass non-None |read_timeout| be prepared to handle
554 these exceptions in subsequent reads from the stream.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000555 """
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000556 authenticated = False
557 last_error = None
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000558 attempt = 0
559 start = self._now()
560 for attempt in itertools.count():
561 if max_attempts and attempt >= max_attempts:
562 # Too many attempts.
563 break
564 if timeout and (self._now() - start) >= timeout:
565 # Retried for too long.
566 break
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000567 extra = {COUNT_KEY: attempt} if attempt else {}
568 request = make_request(extra)
569 try:
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000570 url_response = self._url_open(request, timeout=read_timeout)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000571 logging.debug('url_open(%s) succeeded', request.get_full_url())
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000572 return HttpResponse(url_response, request.get_full_url())
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000573 except urllib2.HTTPError as e:
574 # Unauthorized. Ask to authenticate and then try again.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000575 if e.code in (401, 403):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000576 # Try to authenticate only once. If it doesn't help, then server does
577 # not support app engine authentication.
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000578 logging.error(
vadimsh@chromium.orgdde2d732013-04-10 21:12:52 +0000579 'Authentication is required for %s on attempt %d.\n%s',
580 request.get_full_url(), attempt,
581 self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000582 if not authenticated and self.authenticate():
583 authenticated = True
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000584 # Do not sleep.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000585 continue
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000586 # If authentication failed, return.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000587 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000588 'Unable to authenticate to %s.\n%s',
589 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000590 return None
591
maruel@chromium.orgd58bf5b2013-04-26 17:57:42 +0000592 if ((e.code < 500 and not (retry_404 and e.code == 404)) or
593 (e.code >= 500 and not retry_50x)):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000594 # This HTTPError means we reached the server and there was a problem
595 # with the request, so don't retry.
596 logging.error(
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000597 'Able to connect to %s but an exception was thrown.\n%s',
598 request.get_full_url(), self._format_exception(e, verbose=True))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000599 return None
600
601 # The HTTPError was due to a server error, so retry the attempt.
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000602 logging.warning('Able to connect to %s on attempt %d.\n%s',
603 request.get_full_url(), attempt,
604 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000605 last_error = e
606
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000607 except (urllib2.URLError, httplib.HTTPException,
608 socket.timeout, ssl.SSLError) as e:
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000609 logging.warning('Unable to open url %s on attempt %d.\n%s',
610 request.get_full_url(), attempt,
611 self._format_exception(e))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000612 last_error = e
613
614 # Only sleep if we are going to try again.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000615 if max_attempts and attempt != max_attempts:
616 remaining = None
617 if timeout:
618 remaining = timeout - (self._now() - start)
619 if remaining <= 0:
620 break
621 self.sleep_before_retry(attempt, remaining)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000622
623 logging.error('Unable to open given url, %s, after %d attempts.\n%s',
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000624 request.get_full_url(), max_attempts,
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000625 self._format_exception(last_error, verbose=True))
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000626 return None
627
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000628 def _url_open(self, request, timeout=None):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000629 """Low level method to execute urllib2.Request's.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000630
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000631 To be mocked in tests.
632 """
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000633 if timeout is not None:
634 return self.opener.open(request, timeout=timeout)
635 else:
636 # Leave original default value for |timeout|. It's nontrivial.
637 return self.opener.open(request)
maruel@chromium.orgef333122013-03-12 20:36:40 +0000638
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000639 @staticmethod
640 def _now():
641 """To be mocked in tests."""
642 return time.time()
643
644 @staticmethod
645 def calculate_sleep_before_retry(attempt, max_duration):
646 # Maximum sleeping time. We're hammering a cloud-distributed service, it'll
647 # survive.
648 MAX_SLEEP = 10.
649 # random.random() returns [0.0, 1.0). Starts with relatively short waiting
650 # time by starting with 1.5/2+1.5^-1 median offset.
651 duration = (random.random() * 1.5) + math.pow(1.5, (attempt - 1))
652 assert duration > 0.1
653 duration = min(MAX_SLEEP, duration)
654 if max_duration:
655 duration = min(max_duration, duration)
656 return duration
657
658 @classmethod
659 def sleep_before_retry(cls, attempt, max_duration):
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000660 """Sleeps for some amount of time when retrying the request.
maruel@chromium.org2b2139a2013-04-30 20:14:58 +0000661
662 To be mocked in tests.
663 """
664 time.sleep(cls.calculate_sleep_before_retry(attempt, max_duration))
maruel@chromium.orgef333122013-03-12 20:36:40 +0000665
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000666 @staticmethod
667 def _format_exception(exc, verbose=False):
668 """Given an instance of some exception raised by urlopen returns human
669 readable piece of text with detailed information about the error.
670 """
671 out = ['Exception: %s' % (exc,)]
672 if verbose:
673 if isinstance(exc, urllib2.HTTPError):
674 out.append('-' * 10)
675 if exc.hdrs:
676 for header, value in exc.hdrs.items():
677 if not header.startswith('x-'):
678 out.append('%s: %s' % (header.capitalize(), value))
679 out.append('')
680 out.append(exc.read() or '<empty body>')
681 out.append('-' * 10)
682 return '\n'.join(out)
683
maruel@chromium.orgef333122013-03-12 20:36:40 +0000684
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000685class HttpResponse(object):
686 """Response from HttpService."""
687
688 def __init__(self, url_response, url):
689 self._url_response = url_response
690 self._url = url
691 self._read = 0
692
693 @property
694 def content_length(self):
695 """Total length to the response or None if not known in advance."""
696 length = self._url_response.headers.get('Content-Length')
697 return int(length) if length is not None else None
698
699 def read(self, size=None):
700 """Reads up to |size| bytes from the stream and returns them.
701
702 If |size| is None reads all available bytes.
703
704 Raises TimeoutError on read timeout.
705 """
706 try:
707 data = self._url_response.read(size)
708 self._read += len(data)
709 return data
710 except (socket.timeout, ssl.SSLError) as e:
711 logging.error('Timeout while reading from %s, read %d of %s: %s',
712 self._url, self._read, self.content_length, e)
713 raise TimeoutError(e)
714
715
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000716class AppEngineService(HttpService):
717 """This class implements authentication support for
718 an app engine based services.
maruel@chromium.orgef333122013-03-12 20:36:40 +0000719 """
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000720
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000721 # This lock ensures that user won't be confused with multiple concurrent
722 # login prompts.
723 _auth_lock = threading.Lock()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000724
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000725 def __init__(self, urlhost, email=None, password=None):
726 super(AppEngineService, self).__init__(urlhost)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000727 self.email = email
728 self.password = password
729 self._keyring = None
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000730
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000731 def authenticate(self):
732 """Authenticates in the app engine application.
733 Returns True on success.
734 """
735 if not upload:
vadimsh@chromium.orga1697342013-04-10 22:57:09 +0000736 logging.error('\'upload\' module is missing, '
737 'app engine authentication is disabled.')
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000738 return False
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000739 cookie_jar = self.cookie_jar
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000740 save_cookie_jar = self.save_cookie_jar
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000741 # RPC server that uses AuthenticationSupport's cookie jar.
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000742 class AuthServer(upload.AbstractRpcServer):
743 def _GetOpener(self):
vadimsh@chromium.org2edbe3f2013-04-05 19:44:54 +0000744 # Authentication code needs to know about 302 response.
745 # So make OpenerDirector without HTTPRedirectHandler.
746 opener = urllib2.OpenerDirector()
747 opener.add_handler(urllib2.ProxyHandler())
748 opener.add_handler(urllib2.UnknownHandler())
749 opener.add_handler(urllib2.HTTPHandler())
750 opener.add_handler(urllib2.HTTPDefaultErrorHandler())
751 opener.add_handler(urllib2.HTTPSHandler())
752 opener.add_handler(urllib2.HTTPErrorProcessor())
753 opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000754 return opener
755 def PerformAuthentication(self):
756 self._Authenticate()
757 save_cookie_jar()
758 return self.authenticated
759 with AppEngineService._auth_lock:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000760 rpc_server = AuthServer(self.urlhost, self.get_credentials)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000761 return rpc_server.PerformAuthentication()
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000762
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000763 def get_credentials(self):
764 """Called during authentication process to get the credentials.
765 May be called mutliple times if authentication fails.
766 Returns tuple (email, password).
767 """
768 # 'authenticate' calls this only if 'upload' is present.
769 # Ensure other callers (if any) fail non-cryptically if 'upload' is missing.
770 assert upload, '\'upload\' module is required for this to work'
771 if self.email and self.password:
772 return (self.email, self.password)
773 if not self._keyring:
maruel@chromium.org000bb4d2013-04-26 17:53:27 +0000774 self._keyring = upload.KeyringCreds(self.urlhost,
775 self.urlhost,
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000776 self.email)
777 return self._keyring.GetUserCredentials()
778
779
780class ThreadSafeCookieJar(cookielib.MozillaCookieJar):
781 """MozillaCookieJar with thread safe load and save."""
782
783 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
784 """Loads cookies from the file if it exists."""
maruel@chromium.org4e2676d2013-06-06 18:39:48 +0000785 filename = os.path.expanduser(filename or self.filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000786 with self._cookies_lock:
787 if os.path.exists(filename):
788 try:
789 cookielib.MozillaCookieJar.load(self, filename,
790 ignore_discard,
791 ignore_expires)
792 logging.debug('Loaded cookies from %s', filename)
793 except (cookielib.LoadError, IOError):
794 pass
795 else:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000796 try:
797 fd = os.open(filename, os.O_CREAT, 0600)
798 os.close(fd)
799 except OSError:
800 logging.error('Failed to create %s', filename)
801 try:
802 os.chmod(filename, 0600)
803 except OSError:
804 logging.error('Failed to fix mode for %s', filename)
vadimsh@chromium.org87d63262013-04-04 19:34:21 +0000805
806 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
807 """Saves cookies to the file, completely overwriting it."""
808 logging.debug('Saving cookies to %s', filename or self.filename)
809 with self._cookies_lock:
maruel@chromium.org16452a32013-04-05 00:18:44 +0000810 try:
811 cookielib.MozillaCookieJar.save(self, filename,
812 ignore_discard,
813 ignore_expires)
814 except OSError:
815 logging.error('Failed to save %s', filename)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000816
817
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000818def valid_file(filepath, size):
819 """Determines if the given files appears valid (currently it just checks
820 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000821 if size == UNKNOWN_FILE_SIZE:
822 return True
823 actual_size = os.stat(filepath).st_size
824 if size != actual_size:
825 logging.warning(
826 'Found invalid item %s; %d != %d',
827 os.path.basename(filepath), actual_size, size)
828 return False
829 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000830
831
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000832class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000833 """Priority based worker queue to fetch or upload files from a
834 content-address server. Any function may be given as the fetcher/upload,
835 as long as it takes two inputs (the item contents, and their relative
836 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000837
838 Supports local file system, CIFS or http remotes.
839
840 When the priority of items is equals, works in strict FIFO mode.
841 """
842 # Initial and maximum number of worker threads.
843 INITIAL_WORKERS = 2
844 MAX_WORKERS = 16
845 # Priorities.
846 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
847 INTERNAL_PRIORITY_BITS = (1<<8) - 1
848 RETRIES = 5
849
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000850 def __init__(self, destination_root):
851 # Function to fetch a remote object or upload to a remote location..
852 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000853 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000854 self._done = Queue.PriorityQueue()
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +0000855 self._pool = threading_utils.ThreadPool(
856 self.INITIAL_WORKERS, self.MAX_WORKERS, 0, 'remote')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000857
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000858 def join(self):
859 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000860 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000861
vadimsh@chromium.org53f8d5a2013-06-19 13:03:55 +0000862 def close(self):
863 """Terminates all worker threads."""
864 self._pool.close()
865
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000866 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000867 """Retrieves an object from the remote data store.
868
869 The smaller |priority| gets fetched first.
870
871 Thread-safe.
872 """
873 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000874 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000875
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000876 def _add_item(self, priority, obj, dest, size):
877 assert isinstance(obj, basestring), obj
878 assert isinstance(dest, basestring), dest
879 assert size is None or isinstance(size, int), size
880 return self._pool.add_task(
881 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000882
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000883 def get_one_result(self):
884 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000885
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000886 def _task_executer(self, priority, obj, dest, size):
887 """Wraps self._do_item to trap and retry on IOError exceptions."""
888 try:
889 self._do_item(obj, dest)
890 if size and not valid_file(dest, size):
891 download_size = os.stat(dest).st_size
892 os.remove(dest)
893 raise IOError('File incorrect size after download of %s. Got %s and '
894 'expected %s' % (obj, download_size, size))
895 # TODO(maruel): Technically, we'd want to have an output queue to be a
896 # PriorityQueue.
897 return obj
898 except IOError as e:
899 logging.debug('Caught IOError: %s', e)
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000900 # Remove unfinished download.
901 if os.path.exists(dest):
902 os.remove(dest)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000903 # Retry a few times, lowering the priority.
904 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
905 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000906 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000907 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000908
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000909 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000910 """Returns a object to retrieve objects from a remote."""
911 if re.match(r'^https?://.+$', file_or_url):
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000912 return functools.partial(self._download_file, file_or_url)
913 else:
914 return functools.partial(self._copy_file, file_or_url)
csharp@chromium.orge9c8d942013-03-11 20:48:36 +0000915
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000916 @staticmethod
917 def _download_file(base_url, item, dest):
918 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
919 # easy.
920 try:
921 zipped_source = base_url + item
922 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orgec477752013-05-24 20:48:48 +0000923
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000924 # Because the app engine DB is only eventually consistent, retry
925 # 404 errors because the file might just not be visible yet (even
926 # though it has been uploaded).
927 connection = url_open(zipped_source, retry_404=True,
928 read_timeout=DOWNLOAD_READ_TIMEOUT)
929 if not connection:
930 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orgec477752013-05-24 20:48:48 +0000931
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000932 content_length = connection.content_length
933 decompressor = zlib.decompressobj()
934 size = 0
935 with open(dest, 'wb') as f:
936 while True:
937 chunk = connection.read(ZIPPED_FILE_CHUNK)
938 if not chunk:
939 break
940 size += len(chunk)
941 f.write(decompressor.decompress(chunk))
942 # Ensure that all the data was properly decompressed.
943 uncompressed_data = decompressor.flush()
944 assert not uncompressed_data
945 except IOError as e:
946 logging.error('Failed to download %s at %s.\n%s', item, dest, e)
947 raise
948 except httplib.HTTPException as e:
949 msg = 'HTTPException while retrieving %s at %s.\n%s' % (item, dest, e)
950 logging.error(msg)
951 raise IOError(msg)
952 except zlib.error as e:
953 msg = 'Corrupted zlib for item %s. Processed %d of %s bytes.\n%s' % (
954 item, size, content_length, e)
955 logging.error(msg)
csharp@chromium.orge3413b42013-05-24 17:56:56 +0000956
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000957 # Testing seems to show that if a few machines are trying to download
958 # the same blob, they can cause each other to fail. So if we hit a
959 # zip error, this is the most likely cause (it only downloads some of
960 # the data). Randomly sleep for between 5 and 25 seconds to try and
961 # spread out the downloads.
962 # TODO(csharp): Switch from blobstorage to cloud storage and see if
963 # that solves the issue.
964 sleep_duration = (random.random() * 20) + 5
965 time.sleep(sleep_duration)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000966
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000967 raise IOError(msg)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000968
vadimsh@chromium.org80f73002013-07-12 14:52:44 +0000969 @staticmethod
970 def _copy_file(base_path, item, dest):
971 source = os.path.join(base_path, item)
972 if source == dest:
973 logging.info('Source and destination are the same, no action required')
974 return
975 logging.debug('copy_file(%s, %s)', source, dest)
976 shutil.copy(source, dest)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000977
978
979class CachePolicies(object):
980 def __init__(self, max_cache_size, min_free_space, max_items):
981 """
982 Arguments:
983 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
984 cache is effectively a leak.
985 - min_free_space: Trim if disk free space becomes lower than this value. If
986 0, it unconditionally fill the disk.
987 - max_items: Maximum number of items to keep in the cache. If 0, do not
988 enforce a limit.
989 """
990 self.max_cache_size = max_cache_size
991 self.min_free_space = min_free_space
992 self.max_items = max_items
993
994
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000995class NoCache(object):
996 """This class is intended to be usable everywhere the Cache class is.
997 Instead of downloading to a cache, all files are downloaded to the target
998 directory and then moved to where they are needed.
999 """
1000
1001 def __init__(self, target_directory, remote):
1002 self.target_directory = target_directory
1003 self.remote = remote
1004
1005 def retrieve(self, priority, item, size):
1006 """Get the request file."""
1007 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001008 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001009
1010 def wait_for(self, items):
1011 """Download the first item of the given list if it is missing."""
1012 item = items.iterkeys().next()
1013
1014 if not os.path.exists(self.path(item)):
1015 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001016 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001017 assert downloaded == item
1018
1019 return item
1020
1021 def path(self, item):
1022 return os.path.join(self.target_directory, item)
1023
1024
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001025class Cache(object):
1026 """Stateful LRU cache.
1027
1028 Saves its state as json file.
1029 """
1030 STATE_FILE = 'state.json'
1031
1032 def __init__(self, cache_dir, remote, policies):
1033 """
1034 Arguments:
1035 - cache_dir: Directory where to place the cache.
1036 - remote: Remote where to fetch items from.
1037 - policies: cache retention policies.
1038 """
1039 self.cache_dir = cache_dir
1040 self.remote = remote
1041 self.policies = policies
1042 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001043 self.lru = lru.LRUDict()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001044
1045 # Items currently being fetched. Keep it local to reduce lock contention.
1046 self._pending_queue = set()
1047
1048 # Profiling values.
1049 self._added = []
1050 self._removed = []
1051 self._free_disk = 0
1052
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001053 with tools.Profiler('Setup'):
maruel@chromium.org770993b2012-12-11 17:16:48 +00001054 if not os.path.isdir(self.cache_dir):
1055 os.makedirs(self.cache_dir)
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001056
1057 # Load state of the cache.
vadimsh@chromium.orga40428e2013-07-04 15:43:14 +00001058 if os.path.isfile(self.state_file):
1059 try:
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001060 self.lru = lru.LRUDict.load(self.state_file)
1061 except ValueError as err:
1062 logging.error('Failed to load cache state: %s' % (err,))
1063 # Don't want to keep broken state file.
1064 os.remove(self.state_file)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001065
maruel@chromium.org770993b2012-12-11 17:16:48 +00001066 # Ensure that all files listed in the state still exist and add new ones.
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001067 previous = self.lru.keys_set()
1068 unknown = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001069 for filename in os.listdir(self.cache_dir):
1070 if filename == self.STATE_FILE:
1071 continue
1072 if filename in previous:
1073 previous.remove(filename)
1074 continue
1075 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001076 if not RE_IS_SHA1.match(filename):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001077 logging.warning('Removing unknown file %s from cache', filename)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001078 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +00001079 continue
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001080 # File that's not referenced in 'state.json'.
1081 # TODO(vadimsh): Verify its SHA1 matches file name.
1082 logging.warning('Adding unknown file %s to cache', filename)
1083 unknown.append(filename)
maruel@chromium.org770993b2012-12-11 17:16:48 +00001084
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001085 if unknown:
1086 # Add as oldest files. They will be deleted eventually if not accessed.
1087 self._add_oldest_list(unknown)
1088 logging.warning('Added back %d unknown files', len(unknown))
1089
maruel@chromium.org770993b2012-12-11 17:16:48 +00001090 if previous:
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001091 # Filter out entries that were not found.
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001092 logging.warning('Removed %d lost files', len(previous))
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001093 for filename in previous:
1094 self.lru.pop(filename)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001095 self.trim()
1096
1097 def __enter__(self):
1098 return self
1099
1100 def __exit__(self, _exc_type, _exec_value, _traceback):
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001101 with tools.Profiler('CleanupTrimming'):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001102 self.trim()
1103
1104 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001105 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001106 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001107 '%5d (%8dkb) current',
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001108 len(self.lru),
1109 sum(self.lru.itervalues()) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001110 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +00001111 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
1112 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001113
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001114 def remove_lru_file(self):
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001115 """Removes the last recently used file and returns its size."""
1116 item, size = self.lru.pop_oldest()
1117 self._delete_file(item, size)
1118 return size
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001119
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001120 def trim(self):
1121 """Trims anything we don't know, make sure enough free space exists."""
1122 # Ensure maximum cache size.
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001123 if self.policies.max_cache_size:
1124 total_size = sum(self.lru.itervalues())
1125 while total_size > self.policies.max_cache_size:
1126 total_size -= self.remove_lru_file()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001127
1128 # Ensure maximum number of items in the cache.
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001129 if self.policies.max_items and len(self.lru) > self.policies.max_items:
1130 for _ in xrange(len(self.lru) - self.policies.max_items):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001131 self.remove_lru_file()
1132
1133 # Ensure enough free space.
1134 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001135 trimmed_due_to_space = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001136 while (
1137 self.policies.min_free_space and
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001138 self.lru and
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001139 self._free_disk < self.policies.min_free_space):
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001140 trimmed_due_to_space = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001141 self.remove_lru_file()
1142 self._free_disk = get_free_space(self.cache_dir)
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001143 if trimmed_due_to_space:
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001144 total = sum(self.lru.itervalues())
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001145 logging.warning(
1146 'Trimmed due to not enough free disk space: %.1fkb free, %.1fkb '
1147 'cache (%.1f%% of its maximum capacity)',
1148 self._free_disk / 1024.,
1149 total / 1024.,
1150 100. * self.policies.max_cache_size / float(total),
1151 )
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001152 self.save()
1153
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001154 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001155 """Retrieves a file from the remote, if not already cached, and adds it to
1156 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001157
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001158 If the file is in the cache, verify that the file is valid (i.e. it is
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001159 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001160 """
1161 assert not '/' in item
1162 path = self.path(item)
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001163 found = False
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001164
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001165 if item in self.lru:
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001166 if not valid_file(self.path(item), size):
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001167 self.lru.pop(item)
1168 self._delete_file(item, size)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001169 else:
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001170 # Was already in cache. Update it's LRU value by putting it at the end.
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001171 self.lru.touch(item)
1172 found = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001173
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001174 if not found:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001175 if item in self._pending_queue:
1176 # Already pending. The same object could be referenced multiple times.
1177 return
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001178 # TODO(maruel): It should look at the free disk space, the current cache
1179 # size and the size of the new item on every new item:
1180 # - Trim the cache as more entries are listed when free disk space is low,
1181 # otherwise if the amount of data downloaded during the run > free disk
1182 # space, it'll crash.
1183 # - Make sure there's enough free disk space to fit all dependencies of
1184 # this run! If not, abort early.
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +00001185 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001186 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001187
1188 def add(self, filepath, obj):
1189 """Forcibly adds a file to the cache."""
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001190 if obj not in self.lru:
1191 link_file(self.path(obj), filepath, HARDLINK)
1192 self._add(obj)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001193
1194 def path(self, item):
1195 """Returns the path to one item."""
1196 return os.path.join(self.cache_dir, item)
1197
1198 def save(self):
1199 """Saves the LRU ordering."""
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001200 self.lru.save(self.state_file)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001201
1202 def wait_for(self, items):
1203 """Starts a loop that waits for at least one of |items| to be retrieved.
1204
1205 Returns the first item retrieved.
1206 """
1207 # Flush items already present.
1208 for item in items:
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001209 if item in self.lru:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001210 return item
1211
1212 assert all(i in self._pending_queue for i in items), (
1213 items, self._pending_queue)
1214 # Note that:
1215 # len(self._pending_queue) ==
1216 # ( len(self.remote._workers) - self.remote._ready +
1217 # len(self._remote._queue) + len(self._remote.done))
1218 # There is no lock-free way to verify that.
1219 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +00001220 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001221 self._pending_queue.remove(item)
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001222 self._add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001223 if item in items:
1224 return item
1225
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001226 def _add(self, item):
1227 """Adds an item into LRU cache marking it as a newest one."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001228 size = os.stat(self.path(item)).st_size
1229 self._added.append(size)
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001230 self.lru.add(item, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001231
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +00001232 def _add_oldest_list(self, items):
1233 """Adds a bunch of items into LRU cache marking them as oldest ones."""
1234 pairs = []
1235 for item in items:
1236 size = os.stat(self.path(item)).st_size
1237 self._added.append(size)
1238 pairs.append((item, size))
1239 self.lru.batch_insert_oldest(pairs)
1240
1241 def _delete_file(self, item, size):
1242 """Deletes cache file from the file system."""
1243 self._removed.append(size)
1244 try:
1245 os.remove(self.path(item))
1246 except OSError as e:
1247 logging.error('Error attempting to delete a file\n%s' % e)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001248
1249
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001250class IsolatedFile(object):
1251 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001252 def __init__(self, obj_hash):
1253 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001254 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001255 self.obj_hash = obj_hash
1256 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001257 # .isolate and all the .isolated files recursively included by it with
1258 # 'includes' key. The order of each sha-1 in 'includes', each representing a
1259 # .isolated file in the hash table, is important, as the later ones are not
1260 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001261 self.can_fetch = False
1262
1263 # Raw data.
1264 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001265 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001266 self.children = []
1267
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001268 # Set once the .isolated file is loaded.
1269 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001270 # Set once the files are fetched.
1271 self.files_fetched = False
1272
1273 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001274 """Verifies the .isolated file is valid and loads this object with the json
1275 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001276 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001277 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1278 assert not self._is_parsed
1279 self.data = load_isolated(content)
1280 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1281 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001282
1283 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001284 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001285
1286 Preemptively request files.
1287
1288 Note that |files| is modified by this function.
1289 """
1290 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001291 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001292 return
1293 logging.debug('fetch_files(%s)' % self.obj_hash)
1294 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001295 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001296 # overriden files must not be fetched.
1297 if filepath not in files:
1298 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001299 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001300 # Preemptively request files.
1301 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001302 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001303 self.files_fetched = True
1304
1305
1306class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001307 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001308 def __init__(self):
1309 self.command = []
1310 self.files = {}
1311 self.read_only = None
1312 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001313 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001314 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001315
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001316 def load(self, cache, root_isolated_hash):
1317 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001318
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001319 It enables support for "included" .isolated files. They are processed in
1320 strict order but fetched asynchronously from the cache. This is important so
1321 that a file in an included .isolated file that is overridden by an embedding
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001322 .isolated file is not fetched needlessly. The includes are fetched in one
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001323 pass and the files are fetched as soon as all the ones on the left-side
1324 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001325
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001326 The prioritization is very important here for nested .isolated files.
1327 'includes' have the highest priority and the algorithm is optimized for both
1328 deep and wide trees. A deep one is a long link of .isolated files referenced
1329 one at a time by one item in 'includes'. A wide one has a large number of
1330 'includes' in a single .isolated file. 'left' is defined as an included
1331 .isolated file earlier in the 'includes' list. So the order of the elements
1332 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001333 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001334 self.root = IsolatedFile(root_isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001335
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001336 # Isolated files being retrieved now: hash -> IsolatedFile instance.
1337 pending = {}
1338 # Set of hashes of already retrieved items to refuse recursive includes.
1339 seen = set()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001340
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001341 def retrieve(isolated_file):
1342 h = isolated_file.obj_hash
1343 if h in seen:
1344 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
1345 assert h not in pending
1346 seen.add(h)
1347 pending[h] = isolated_file
1348 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
1349
1350 retrieve(self.root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001351
1352 while pending:
1353 item_hash = cache.wait_for(pending)
1354 item = pending.pop(item_hash)
1355 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001356 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001357 # It's the root item.
1358 item.can_fetch = True
1359
1360 for new_child in item.children:
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001361 retrieve(new_child)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001362
1363 # Traverse the whole tree to see if files can now be fetched.
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001364 self._traverse_tree(cache, self.root)
1365
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001366 def check(n):
1367 return all(check(x) for x in n.children) and n.files_fetched
1368 assert check(self.root)
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001369
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001370 self.relative_cwd = self.relative_cwd or ''
1371 self.read_only = self.read_only or False
1372
vadimsh@chromium.orgf4c063e2013-07-04 14:23:31 +00001373 def _traverse_tree(self, cache, node):
1374 if node.can_fetch:
1375 if not node.files_fetched:
1376 self._update_self(cache, node)
1377 will_break = False
1378 for i in node.children:
1379 if not i.can_fetch:
1380 if will_break:
1381 break
1382 # Automatically mark the first one as fetcheable.
1383 i.can_fetch = True
1384 will_break = True
1385 self._traverse_tree(cache, i)
1386
1387 def _update_self(self, cache, node):
1388 node.fetch_files(cache, self.files)
1389 # Grabs properties.
1390 if not self.command and node.data.get('command'):
1391 self.command = node.data['command']
1392 if self.read_only is None and node.data.get('read_only') is not None:
1393 self.read_only = node.data['read_only']
1394 if (self.relative_cwd is None and
1395 node.data.get('relative_cwd') is not None):
1396 self.relative_cwd = node.data['relative_cwd']
1397
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001398
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001399def create_directories(base_directory, files):
1400 """Creates the directory structure needed by the given list of files."""
1401 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1402 # Creates the tree of directories to create.
1403 directories = set(os.path.dirname(f) for f in files)
1404 for item in list(directories):
1405 while item:
1406 directories.add(item)
1407 item = os.path.dirname(item)
1408 for d in sorted(directories):
1409 if d:
1410 os.mkdir(os.path.join(base_directory, d))
1411
1412
1413def create_links(base_directory, files):
1414 """Creates any links needed by the given set of files."""
1415 for filepath, properties in files:
csharp@chromium.org89eaf082013-03-26 18:56:21 +00001416 if 'l' not in properties:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001417 continue
maruel@chromium.org3320ee12013-03-28 13:23:31 +00001418 if sys.platform == 'win32':
1419 # TODO(maruel): Create junctions or empty text files similar to what
1420 # cygwin do?
1421 logging.warning('Ignoring symlink %s', filepath)
1422 continue
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001423 outfile = os.path.join(base_directory, filepath)
1424 # symlink doesn't exist on Windows. So the 'link' property should
1425 # never be specified for windows .isolated file.
1426 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1427 if 'm' in properties:
1428 lchmod = getattr(os, 'lchmod', None)
1429 if lchmod:
1430 lchmod(outfile, properties['m'])
1431
1432
1433def setup_commands(base_directory, cwd, cmd):
1434 """Correctly adjusts and then returns the required working directory
1435 and command needed to run the test.
1436 """
1437 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1438 cwd = os.path.join(base_directory, cwd)
1439 if not os.path.isdir(cwd):
1440 os.makedirs(cwd)
1441
1442 # Ensure paths are correctly separated on windows.
1443 cmd[0] = cmd[0].replace('/', os.path.sep)
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001444 cmd = tools.fix_python_path(cmd)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001445
1446 return cwd, cmd
1447
1448
1449def generate_remaining_files(files):
1450 """Generates a dictionary of all the remaining files to be downloaded."""
1451 remaining = {}
1452 for filepath, props in files:
1453 if 'h' in props:
1454 remaining.setdefault(props['h'], []).append((filepath, props))
1455
1456 return remaining
1457
1458
1459def download_test_data(isolated_hash, target_directory, remote):
1460 """Downloads the dependencies to the given directory."""
1461 if not os.path.exists(target_directory):
1462 os.makedirs(target_directory)
1463
1464 settings = Settings()
1465 no_cache = NoCache(target_directory, Remote(remote))
1466
1467 # Download all the isolated files.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001468 with tools.Profiler('GetIsolateds'):
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001469 settings.load(no_cache, isolated_hash)
1470
1471 if not settings.command:
1472 print >> sys.stderr, 'No command to run'
1473 return 1
1474
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001475 with tools.Profiler('GetRest'):
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001476 create_directories(target_directory, settings.files)
1477 create_links(target_directory, settings.files.iteritems())
1478
1479 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1480 settings.command[:])
1481
1482 remaining = generate_remaining_files(settings.files.iteritems())
1483
1484 # Now block on the remaining files to be downloaded and mapped.
1485 logging.info('Retrieving remaining files')
1486 last_update = time.time()
1487 while remaining:
1488 obj = no_cache.wait_for(remaining)
1489 files = remaining.pop(obj)
1490
1491 for i, (filepath, properties) in enumerate(files):
1492 outfile = os.path.join(target_directory, filepath)
1493 logging.info(no_cache.path(obj))
1494
1495 if i + 1 == len(files):
1496 os.rename(no_cache.path(obj), outfile)
1497 else:
1498 shutil.copyfile(no_cache.path(obj), outfile)
1499
maruel@chromium.orgbaa108d2013-03-28 13:24:51 +00001500 if 'm' in properties and not sys.platform == 'win32':
1501 # It's not set on Windows. It could be set only in the case of
1502 # downloading content generated from another OS. Do not crash in that
1503 # case.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001504 os.chmod(outfile, properties['m'])
1505
1506 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
csharp@chromium.org5daba352013-07-03 17:29:27 +00001507 msg = '%d files remaining...' % len(remaining)
1508 print msg
1509 logging.info(msg)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001510 last_update = time.time()
1511
1512 print('.isolated files successfully downloaded and setup in %s' %
1513 target_directory)
1514 print('To run this test please run the command %s from the directory %s' %
1515 (cmd, cwd))
1516
1517 return 0
1518
1519
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001520def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001521 """Downloads the dependencies in the cache, hardlinks them into a temporary
1522 directory and runs the executable.
1523 """
1524 settings = Settings()
1525 with Cache(cache_dir, Remote(remote), policies) as cache:
1526 outdir = make_temp_dir('run_tha_test', cache_dir)
1527 try:
1528 # Initiate all the files download.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001529 with tools.Profiler('GetIsolateds'):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001530 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001531 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001532 # Adds it in the cache. While not strictly necessary, this simplifies
1533 # the rest.
maruel@chromium.orgcb3c3d52013-03-14 18:55:30 +00001534 h = hashlib.sha1(open(isolated_hash, 'rb').read()).hexdigest()
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001535 cache.add(isolated_hash, h)
1536 isolated_hash = h
1537 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001538
1539 if not settings.command:
1540 print >> sys.stderr, 'No command to run'
1541 return 1
1542
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001543 with tools.Profiler('GetRest'):
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001544 create_directories(outdir, settings.files)
1545 create_links(outdir, settings.files.iteritems())
1546 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001547
1548 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001549 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1550 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001551
1552 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001553 logging.info('Retrieving remaining files')
1554 last_update = time.time()
vadimsh@chromium.orgb074b162013-08-22 17:55:46 +00001555 with threading_utils.DeadlockDetector(DEADLOCK_TIMEOUT) as detector:
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001556 while remaining:
1557 detector.ping()
1558 obj = cache.wait_for(remaining)
1559 for filepath, properties in remaining.pop(obj):
1560 outfile = os.path.join(outdir, filepath)
maruel@chromium.orgb7c003d2013-07-24 13:04:30 +00001561 link_file(outfile, cache.path(obj), HARDLINK)
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001562 if 'm' in properties:
1563 # It's not set on Windows.
1564 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001565
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001566 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1567 msg = '%d files remaining...' % len(remaining)
1568 print msg
1569 logging.info(msg)
1570 last_update = time.time()
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001571
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001572 if settings.read_only:
vadimsh@chromium.org5db0f4f2013-07-04 13:57:02 +00001573 logging.info('Making files read only')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001574 make_writable(outdir, True)
1575 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001576
1577 # TODO(csharp): This should be specified somewhere else.
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +00001578 # TODO(vadimsh): Pass it via 'env_vars' in manifest.
csharp@chromium.orge217f302012-11-22 16:51:53 +00001579 # Add a rotating log file if one doesn't already exist.
1580 env = os.environ.copy()
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +00001581 env.setdefault('RUN_TEST_CASES_LOG_FILE',
1582 os.path.join(MAIN_DIR, RUN_TEST_CASES_LOG))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001583 try:
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001584 with tools.Profiler('RunTest'):
csharp@chromium.orge217f302012-11-22 16:51:53 +00001585 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001586 except OSError:
1587 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1588 raise
1589 finally:
1590 rmtree(outdir)
1591
1592
1593def main():
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001594 tools.disable_buffering()
1595 parser = tools.OptionParserWithLogging(
maruel@chromium.orgea101982013-07-24 15:54:29 +00001596 usage='%prog <options>', log_file=RUN_ISOLATED_LOG_FILE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001597
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001598 group = optparse.OptionGroup(parser, 'Download')
1599 group.add_option(
1600 '--download', metavar='DEST',
1601 help='Downloads files to DEST and returns without running, instead of '
1602 'downloading and then running from a temporary directory.')
1603 parser.add_option_group(group)
1604
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001605 group = optparse.OptionGroup(parser, 'Data source')
1606 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001607 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001608 metavar='FILE',
1609 help='File/url describing what to map or run')
1610 group.add_option(
1611 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001612 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001613 parser.add_option_group(group)
1614
1615 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001616 '-r', '--remote', metavar='URL',
1617 default=
1618 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1619 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001620 group = optparse.OptionGroup(parser, 'Cache management')
1621 group.add_option(
1622 '--cache',
1623 default='cache',
1624 metavar='DIR',
1625 help='Cache directory, default=%default')
1626 group.add_option(
1627 '--max-cache-size',
1628 type='int',
1629 metavar='NNN',
1630 default=20*1024*1024*1024,
1631 help='Trim if the cache gets larger than this value, default=%default')
1632 group.add_option(
1633 '--min-free-space',
1634 type='int',
1635 metavar='NNN',
maruel@chromium.org9e98e432013-05-31 17:06:51 +00001636 default=2*1024*1024*1024,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001637 help='Trim if disk free space becomes lower than this value, '
1638 'default=%default')
1639 group.add_option(
1640 '--max-items',
1641 type='int',
1642 metavar='NNN',
1643 default=100000,
1644 help='Trim if more than this number of items are in the cache '
1645 'default=%default')
1646 parser.add_option_group(group)
1647
1648 options, args = parser.parse_args()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001649
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001650 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001651 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001652 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001653 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001654 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001655 parser.error('Unsupported args %s' % ' '.join(args))
1656
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001657 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001658 policies = CachePolicies(
1659 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001660
1661 if options.download:
1662 return download_test_data(options.isolated or options.hash,
1663 options.download, options.remote)
1664 else:
1665 try:
1666 return run_tha_test(
1667 options.isolated or options.hash,
1668 options.cache,
1669 options.remote,
1670 policies)
1671 except Exception, e:
1672 # Make sure any exception is logged.
1673 logging.exception(e)
1674 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001675
1676
1677if __name__ == '__main__':
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001678 # Ensure that we are always running with the correct encoding.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001679 fix_encoding.fix_encoding()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001680 sys.exit(main())