blob: c8e1afb8b1693d709d272bb2155f8f40b723c037 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000011import ctypes
12import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000013import httplib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000014import json
15import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000016import logging.handlers
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000017import math
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000018import optparse
19import os
20import Queue
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000021import random
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000022import re
23import shutil
24import stat
25import subprocess
26import sys
27import tempfile
28import threading
29import time
30import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000031import urllib2
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000032import urlparse
csharp@chromium.orga92403f2012-11-20 15:13:59 +000033import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000034
35
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000036# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000037HARDLINK, SYMLINK, COPY = range(1, 4)
38
39RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
40
csharp@chromium.org8dc52542012-11-08 20:29:55 +000041# The file size to be used when we don't know the correct file size,
42# generally used for .isolated files.
43UNKNOWN_FILE_SIZE = None
44
csharp@chromium.orga92403f2012-11-20 15:13:59 +000045# The size of each chunk to read when downloading and unzipping files.
46ZIPPED_FILE_CHUNK = 16 * 1024
47
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000048# The name of the log file to use.
49RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
50
csharp@chromium.orge217f302012-11-22 16:51:53 +000051# The base directory containing this file.
52BASE_DIR = os.path.dirname(os.path.abspath(__file__))
53
54# The name of the log to use for the run_test_cases.py command
55RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
56
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000057# The delay (in seconds) to wait between logging statements when retrieving
58# the required files. This is intended to let the user (or buildbot) know that
59# the program is still running.
60DELAY_BETWEEN_UPDATES_IN_SECS = 30
61
csharp@chromium.orgf13eec02013-03-11 18:22:56 +000062# The name of the key to store the count of url attempts.
63COUNT_KEY = 'UrlOpenAttempt'
64
65# The maximum number of attempts to trying opening a url before aborting.
66MAX_URL_OPEN_ATTEMPTS = 20
67
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000068
69class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +000070 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000071 pass
72
73
74class MappingError(OSError):
75 """Failed to recreate the tree."""
76 pass
77
78
79def get_flavor():
80 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
81 flavors = {
82 'cygwin': 'win',
83 'win32': 'win',
84 'darwin': 'mac',
85 'sunos5': 'solaris',
86 'freebsd7': 'freebsd',
87 'freebsd8': 'freebsd',
88 }
89 return flavors.get(sys.platform, 'linux')
90
91
92def os_link(source, link_name):
93 """Add support for os.link() on Windows."""
94 if sys.platform == 'win32':
95 if not ctypes.windll.kernel32.CreateHardLinkW(
96 unicode(link_name), unicode(source), 0):
97 raise OSError()
98 else:
99 os.link(source, link_name)
100
101
102def readable_copy(outfile, infile):
103 """Makes a copy of the file that is readable by everyone."""
104 shutil.copy(infile, outfile)
105 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
106 stat.S_IRGRP | stat.S_IROTH)
107 os.chmod(outfile, read_enabled_mode)
108
109
110def link_file(outfile, infile, action):
111 """Links a file. The type of link depends on |action|."""
112 logging.debug('Mapping %s to %s' % (infile, outfile))
113 if action not in (HARDLINK, SYMLINK, COPY):
114 raise ValueError('Unknown mapping action %s' % action)
115 if not os.path.isfile(infile):
116 raise MappingError('%s is missing' % infile)
117 if os.path.isfile(outfile):
118 raise MappingError(
119 '%s already exist; insize:%d; outsize:%d' %
120 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
121
122 if action == COPY:
123 readable_copy(outfile, infile)
124 elif action == SYMLINK and sys.platform != 'win32':
125 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000126 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000127 else:
128 try:
129 os_link(infile, outfile)
130 except OSError:
131 # Probably a different file system.
132 logging.warn(
133 'Failed to hardlink, failing back to copy %s to %s' % (
134 infile, outfile))
135 readable_copy(outfile, infile)
136
137
138def _set_write_bit(path, read_only):
139 """Sets or resets the executable bit on a file or directory."""
140 mode = os.lstat(path).st_mode
141 if read_only:
142 mode = mode & 0500
143 else:
144 mode = mode | 0200
145 if hasattr(os, 'lchmod'):
146 os.lchmod(path, mode) # pylint: disable=E1101
147 else:
148 if stat.S_ISLNK(mode):
149 # Skip symlink without lchmod() support.
150 logging.debug('Can\'t change +w bit on symlink %s' % path)
151 return
152
153 # TODO(maruel): Implement proper DACL modification on Windows.
154 os.chmod(path, mode)
155
156
157def make_writable(root, read_only):
158 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000159 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000160 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
161 for filename in filenames:
162 _set_write_bit(os.path.join(dirpath, filename), read_only)
163
164 for dirname in dirnames:
165 _set_write_bit(os.path.join(dirpath, dirname), read_only)
166
167
168def rmtree(root):
169 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
170 make_writable(root, False)
171 if sys.platform == 'win32':
172 for i in range(3):
173 try:
174 shutil.rmtree(root)
175 break
176 except WindowsError: # pylint: disable=E0602
177 delay = (i+1)*2
178 print >> sys.stderr, (
179 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
180 time.sleep(delay)
181 else:
182 shutil.rmtree(root)
183
184
185def is_same_filesystem(path1, path2):
186 """Returns True if both paths are on the same filesystem.
187
188 This is required to enable the use of hardlinks.
189 """
190 assert os.path.isabs(path1), path1
191 assert os.path.isabs(path2), path2
192 if sys.platform == 'win32':
193 # If the drive letter mismatches, assume it's a separate partition.
194 # TODO(maruel): It should look at the underlying drive, a drive letter could
195 # be a mount point to a directory on another drive.
196 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
197 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
198 if path1[0].lower() != path2[0].lower():
199 return False
200 return os.stat(path1).st_dev == os.stat(path2).st_dev
201
202
203def get_free_space(path):
204 """Returns the number of free bytes."""
205 if sys.platform == 'win32':
206 free_bytes = ctypes.c_ulonglong(0)
207 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
208 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
209 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000210 # For OSes other than Windows.
211 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000212 return f.f_bfree * f.f_frsize
213
214
215def make_temp_dir(prefix, root_dir):
216 """Returns a temporary directory on the same file system as root_dir."""
217 base_temp_dir = None
218 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
219 base_temp_dir = os.path.dirname(root_dir)
220 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
221
222
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000223def load_isolated(content):
224 """Verifies the .isolated file is valid and loads this object with the json
225 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000226 """
227 try:
228 data = json.loads(content)
229 except ValueError:
230 raise ConfigError('Failed to parse: %s...' % content[:100])
231
232 if not isinstance(data, dict):
233 raise ConfigError('Expected dict, got %r' % data)
234
235 for key, value in data.iteritems():
236 if key == 'command':
237 if not isinstance(value, list):
238 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000239 if not value:
240 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000241 for subvalue in value:
242 if not isinstance(subvalue, basestring):
243 raise ConfigError('Expected string, got %r' % subvalue)
244
245 elif key == 'files':
246 if not isinstance(value, dict):
247 raise ConfigError('Expected dict, got %r' % value)
248 for subkey, subvalue in value.iteritems():
249 if not isinstance(subkey, basestring):
250 raise ConfigError('Expected string, got %r' % subkey)
251 if not isinstance(subvalue, dict):
252 raise ConfigError('Expected dict, got %r' % subvalue)
253 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000254 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000255 if not isinstance(subsubvalue, basestring):
256 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000257 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000258 if not isinstance(subsubvalue, int):
259 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000260 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000261 if not RE_IS_SHA1.match(subsubvalue):
262 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000263 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000264 if not isinstance(subsubvalue, int):
265 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000266 else:
267 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000268 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000269 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000270 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
271 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000272
273 elif key == 'includes':
274 if not isinstance(value, list):
275 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000276 if not value:
277 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000278 for subvalue in value:
279 if not RE_IS_SHA1.match(subvalue):
280 raise ConfigError('Expected sha-1, got %r' % subvalue)
281
282 elif key == 'read_only':
283 if not isinstance(value, bool):
284 raise ConfigError('Expected bool, got %r' % value)
285
286 elif key == 'relative_cwd':
287 if not isinstance(value, basestring):
288 raise ConfigError('Expected string, got %r' % value)
289
290 elif key == 'os':
291 if value != get_flavor():
292 raise ConfigError(
293 'Expected \'os\' to be \'%s\' but got \'%s\'' %
294 (get_flavor(), value))
295
296 else:
297 raise ConfigError('Unknown key %s' % key)
298
299 return data
300
301
302def fix_python_path(cmd):
303 """Returns the fixed command line to call the right python executable."""
304 out = cmd[:]
305 if out[0] == 'python':
306 out[0] = sys.executable
307 elif out[0].endswith('.py'):
308 out.insert(0, sys.executable)
309 return out
310
311
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000312def url_open(url, data=None):
313 """Attempts to open the given url multiple times.
314
315 |data| can be either:
316 -None for a GET request
317 -str for pre-encoded data
318 -list for data to be encoded
319 -dict for data to be encoded (COUNT_KEY will be added in this case)
320
321 If no wait_duration is given, the default wait time will exponentially
322 increase between each retry.
323
324 Returns a file-like object, where the response may be read from, or None
325 if it was unable to connect.
326 """
327 method = 'GET' if data is None else 'POST'
328
329 data = data if data is not None else {}
330 if isinstance(data, dict) and COUNT_KEY in data:
331 logging.error('%s already existed in the data passed into UlrOpen. It '
332 'would be overwritten. Aborting UrlOpen', COUNT_KEY)
333 return None
334
335 for attempt in range(MAX_URL_OPEN_ATTEMPTS):
336 try:
337 if isinstance(data, str):
338 encoded_data = data
339 else:
340 if isinstance(data, dict):
341 data[COUNT_KEY] = attempt
342 encoded_data = urllib.urlencode(data)
343
344 if method == 'POST':
345 # Simply specifying data to urlopen makes it a POST.
346 url_response = urllib2.urlopen(url, encoded_data)
347 else:
348 url_parts = list(urlparse.urlparse(url))
349 url_parts[4] = encoded_data
350 url = urlparse.urlunparse(url_parts)
351 url_response = urllib2.urlopen(url)
352
353 logging.info('url_open(%s) succeeded', url)
354 return url_response
355 except urllib2.HTTPError as e:
356 if e.code < 500:
357 # This HTTPError means we reached the server and there was a problem
358 # with the request, so don't retry.
359 logging.exception('Able to connect to %s but an exception was '
360 'thrown.\n%s', url, e)
361 return None
362
363 # The HTTPError was due to a server error, so retry the attempt.
364 logging.warning('Able to connect to %s on attempt %d.\nException: %s ',
365 url, attempt, e)
366
367 except (urllib2.URLError, httplib.HTTPException) as e:
368 logging.warning('Unable to open url %s on attempt %d.\nException: %s',
369 url, attempt, e)
370
371 # Only sleep if we are going to try again.
372 if attempt != MAX_URL_OPEN_ATTEMPTS - 1:
373 duration = random.random() * 3 + math.pow(1.5, (attempt + 1))
374 duration = min(10, max(0.1, duration))
375 time.sleep(duration)
376
377 logging.error('Unable to open given url, %s, after %d attempts.',
378 url, MAX_URL_OPEN_ATTEMPTS)
379 return None
380
381
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000382class ThreadPool(object):
383 """Implements a multithreaded worker pool oriented for mapping jobs with
384 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000385
386 Arguments:
387 - initial_threads: Number of threads to start immediately. Can be 0 if it is
388 uncertain that threads will be needed.
389 - max_threads: Maximum number of threads that will be started when all the
390 threads are busy working. Often the number of CPU cores.
391 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
392 queue. A non-zero value may make add_task() blocking.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000393 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000394 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000395
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000396 def __init__(self, initial_threads, max_threads, queue_size):
397 logging.debug(
398 'ThreadPool(%d, %d, %d)', initial_threads, max_threads, queue_size)
399 assert initial_threads <= max_threads
400 # Update this check once 256 cores CPU are common.
401 assert max_threads <= 256
402
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000403 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000404 self._max_threads = max_threads
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000405
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000406 # Mutables.
407 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000408 self._num_of_added_tasks = 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000409 self._outputs_exceptions_cond = threading.Condition()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000410 self._outputs = []
411 self._exceptions = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000412 # Number of threads in wait state.
413 self._ready_lock = threading.Lock()
414 self._ready = 0
415 self._workers_lock = threading.Lock()
416 self._workers = []
417 for _ in range(initial_threads):
418 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000419
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000420 def _add_worker(self):
421 """Adds one worker thread if there isn't too many. Thread-safe."""
422 # Better to take the lock two times than hold it for too long.
423 with self._workers_lock:
424 if len(self._workers) >= self._max_threads:
425 return False
426 worker = threading.Thread(target=self._run)
427 with self._workers_lock:
428 if len(self._workers) >= self._max_threads:
429 return False
430 self._workers.append(worker)
431 worker.daemon = True
432 worker.start()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000433
maruel@chromium.org831958f2013-01-22 15:01:46 +0000434 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000435 """Adds a task, a function to be executed by a worker.
436
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000437 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000438 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000439
440 Returns the index of the item added, e.g. the total number of enqueued items
441 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000442 """
maruel@chromium.org831958f2013-01-22 15:01:46 +0000443 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000444 assert callable(func)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000445 with self._ready_lock:
446 start_new_worker = not self._ready
447 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000448 self._num_of_added_tasks += 1
449 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000450 self.tasks.put((priority, index, func, args, kwargs))
451 if start_new_worker:
452 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000453 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000454
455 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000456 """Worker thread loop. Runs until a None task is queued."""
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000457 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000458 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000459 with self._ready_lock:
460 self._ready += 1
461 task = self.tasks.get()
462 finally:
463 with self._ready_lock:
464 self._ready -= 1
465 try:
466 if task is None:
467 # We're done.
468 return
469 _priority, _index, func, args, kwargs = task
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000470 out = func(*args, **kwargs)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000471 if out is not None:
472 self._outputs_exceptions_cond.acquire()
473 try:
474 self._outputs.append(out)
475 self._outputs_exceptions_cond.notifyAll()
476 finally:
477 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000478 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000479 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000480 exc_info = sys.exc_info()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000481 self._outputs_exceptions_cond.acquire()
482 try:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000483 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000484 self._outputs_exceptions_cond.notifyAll()
485 finally:
486 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000487 finally:
488 self.tasks.task_done()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000489
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000490 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000491 """Extracts all the results from each threads unordered.
492
493 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000494
495 Note: will wait for all work items to be done before returning an exception.
496 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000497 """
498 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000499 self.tasks.join()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000500 self._outputs_exceptions_cond.acquire()
501 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000502 if self._exceptions:
503 e = self._exceptions.pop(0)
504 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000505 out = self._outputs
506 self._outputs = []
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000507 finally:
508 self._outputs_exceptions_cond.release()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000509 return out
510
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000511 def get_one_result(self):
512 """Returns the next item that was generated or raises an exception if one
513 occured.
514
515 Warning: this function will hang if there is no work item left. Use join
516 instead.
517 """
518 self._outputs_exceptions_cond.acquire()
519 try:
520 while True:
521 if self._exceptions:
522 e = self._exceptions.pop(0)
523 raise e[0], e[1], e[2]
524 if self._outputs:
525 return self._outputs.pop(0)
526 self._outputs_exceptions_cond.wait()
527 finally:
528 self._outputs_exceptions_cond.release()
529
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000530 def close(self):
531 """Closes all the threads."""
532 for _ in range(len(self._workers)):
533 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000534 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000535 for t in self._workers:
536 t.join()
537
538 def __enter__(self):
539 """Enables 'with' statement."""
540 return self
541
542 def __exit__(self, exc_type, exc_value, traceback):
543 """Enables 'with' statement."""
544 self.close()
545
546
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000547def valid_file(filepath, size):
548 """Determines if the given files appears valid (currently it just checks
549 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000550 if size == UNKNOWN_FILE_SIZE:
551 return True
552 actual_size = os.stat(filepath).st_size
553 if size != actual_size:
554 logging.warning(
555 'Found invalid item %s; %d != %d',
556 os.path.basename(filepath), actual_size, size)
557 return False
558 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000559
560
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000561class Profiler(object):
562 def __init__(self, name):
563 self.name = name
564 self.start_time = None
565
566 def __enter__(self):
567 self.start_time = time.time()
568 return self
569
570 def __exit__(self, _exc_type, _exec_value, _traceback):
571 time_taken = time.time() - self.start_time
572 logging.info('Profiling: Section %s took %3.3f seconds',
573 self.name, time_taken)
574
575
576class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000577 """Priority based worker queue to fetch or upload files from a
578 content-address server. Any function may be given as the fetcher/upload,
579 as long as it takes two inputs (the item contents, and their relative
580 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000581
582 Supports local file system, CIFS or http remotes.
583
584 When the priority of items is equals, works in strict FIFO mode.
585 """
586 # Initial and maximum number of worker threads.
587 INITIAL_WORKERS = 2
588 MAX_WORKERS = 16
589 # Priorities.
590 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
591 INTERNAL_PRIORITY_BITS = (1<<8) - 1
592 RETRIES = 5
593
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000594 def __init__(self, destination_root):
595 # Function to fetch a remote object or upload to a remote location..
596 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000597 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000598 self._done = Queue.PriorityQueue()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000599 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000600
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000601 def join(self):
602 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000603 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000604
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000605 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000606 """Retrieves an object from the remote data store.
607
608 The smaller |priority| gets fetched first.
609
610 Thread-safe.
611 """
612 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000613 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000614
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000615 def _add_item(self, priority, obj, dest, size):
616 assert isinstance(obj, basestring), obj
617 assert isinstance(dest, basestring), dest
618 assert size is None or isinstance(size, int), size
619 return self._pool.add_task(
620 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000621
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000622 def get_one_result(self):
623 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000624
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000625 def _task_executer(self, priority, obj, dest, size):
626 """Wraps self._do_item to trap and retry on IOError exceptions."""
627 try:
628 self._do_item(obj, dest)
629 if size and not valid_file(dest, size):
630 download_size = os.stat(dest).st_size
631 os.remove(dest)
632 raise IOError('File incorrect size after download of %s. Got %s and '
633 'expected %s' % (obj, download_size, size))
634 # TODO(maruel): Technically, we'd want to have an output queue to be a
635 # PriorityQueue.
636 return obj
637 except IOError as e:
638 logging.debug('Caught IOError: %s', e)
639 # Retry a few times, lowering the priority.
640 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
641 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000642 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000643 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000644
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000645 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000646 """Returns a object to retrieve objects from a remote."""
647 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000648 def download_file(item, dest):
649 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
650 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000651 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +0000652 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000653 logging.debug('download_file(%s)', zipped_source)
csharp@chromium.orgf13eec02013-03-11 18:22:56 +0000654 connection = url_open(zipped_source)
655 if not connection:
656 raise IOError('Unable to open connection to %s' % zipped_source)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000657 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +0000658 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000659 with open(dest, 'wb') as f:
660 while True:
661 chunk = connection.read(ZIPPED_FILE_CHUNK)
662 if not chunk:
663 break
maruel@chromium.org3f039182012-11-27 21:32:41 +0000664 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000665 f.write(decompressor.decompress(chunk))
666 # Ensure that all the data was properly decompressed.
667 uncompressed_data = decompressor.flush()
668 assert not uncompressed_data
csharp@chromium.org549669e2013-01-22 19:48:17 +0000669 except IOError:
670 logging.error('Encountered an exception with (%s, %s)' % (item, dest))
671 raise
csharp@chromium.orga110d792013-01-07 16:16:16 +0000672 except httplib.HTTPException as e:
673 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +0000674 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +0000675 # Log the first bytes to see if it's uncompressed data.
676 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +0000677 raise IOError(
678 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
679 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000680
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000681 return download_file
682
683 def copy_file(item, dest):
684 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000685 if source == dest:
686 logging.info('Source and destination are the same, no action required')
687 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000688 logging.debug('copy_file(%s, %s)', source, dest)
689 shutil.copy(source, dest)
690 return copy_file
691
692
693class CachePolicies(object):
694 def __init__(self, max_cache_size, min_free_space, max_items):
695 """
696 Arguments:
697 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
698 cache is effectively a leak.
699 - min_free_space: Trim if disk free space becomes lower than this value. If
700 0, it unconditionally fill the disk.
701 - max_items: Maximum number of items to keep in the cache. If 0, do not
702 enforce a limit.
703 """
704 self.max_cache_size = max_cache_size
705 self.min_free_space = min_free_space
706 self.max_items = max_items
707
708
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000709class NoCache(object):
710 """This class is intended to be usable everywhere the Cache class is.
711 Instead of downloading to a cache, all files are downloaded to the target
712 directory and then moved to where they are needed.
713 """
714
715 def __init__(self, target_directory, remote):
716 self.target_directory = target_directory
717 self.remote = remote
718
719 def retrieve(self, priority, item, size):
720 """Get the request file."""
721 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000722 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000723
724 def wait_for(self, items):
725 """Download the first item of the given list if it is missing."""
726 item = items.iterkeys().next()
727
728 if not os.path.exists(self.path(item)):
729 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000730 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000731 assert downloaded == item
732
733 return item
734
735 def path(self, item):
736 return os.path.join(self.target_directory, item)
737
738
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000739class Cache(object):
740 """Stateful LRU cache.
741
742 Saves its state as json file.
743 """
744 STATE_FILE = 'state.json'
745
746 def __init__(self, cache_dir, remote, policies):
747 """
748 Arguments:
749 - cache_dir: Directory where to place the cache.
750 - remote: Remote where to fetch items from.
751 - policies: cache retention policies.
752 """
753 self.cache_dir = cache_dir
754 self.remote = remote
755 self.policies = policies
756 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
757 # The tuple(file, size) are kept as an array in a LRU style. E.g.
758 # self.state[0] is the oldest item.
759 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +0000760 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000761 # A lookup map to speed up searching.
762 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +0000763 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000764
765 # Items currently being fetched. Keep it local to reduce lock contention.
766 self._pending_queue = set()
767
768 # Profiling values.
769 self._added = []
770 self._removed = []
771 self._free_disk = 0
772
maruel@chromium.org770993b2012-12-11 17:16:48 +0000773 with Profiler('Setup'):
774 if not os.path.isdir(self.cache_dir):
775 os.makedirs(self.cache_dir)
776 if os.path.isfile(self.state_file):
777 try:
778 self.state = json.load(open(self.state_file, 'r'))
779 except (IOError, ValueError), e:
780 # Too bad. The file will be overwritten and the cache cleared.
781 logging.error(
782 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
783 self._state_need_to_be_saved = True
784 if (not isinstance(self.state, list) or
785 not all(
786 isinstance(i, (list, tuple)) and len(i) == 2
787 for i in self.state)):
788 # Discard.
789 self._state_need_to_be_saved = True
790 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000791
maruel@chromium.org770993b2012-12-11 17:16:48 +0000792 # Ensure that all files listed in the state still exist and add new ones.
793 previous = set(filename for filename, _ in self.state)
794 if len(previous) != len(self.state):
795 logging.warn('Cache state is corrupted, found duplicate files')
796 self._state_need_to_be_saved = True
797 self.state = []
798
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000799 added = 0
800 for filename in os.listdir(self.cache_dir):
801 if filename == self.STATE_FILE:
802 continue
803 if filename in previous:
804 previous.remove(filename)
805 continue
806 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000807 if not RE_IS_SHA1.match(filename):
808 logging.warn('Removing unknown file %s from cache', filename)
809 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +0000810 continue
811 # Insert as the oldest file. It will be deleted eventually if not
812 # accessed.
813 self._add(filename, False)
814 logging.warn('Add unknown file %s to cache', filename)
815 added += 1
816
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000817 if added:
818 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000819 if previous:
820 logging.warn('Removed %d lost files', len(previous))
821 # Set explicitly in case self._add() wasn't called.
822 self._state_need_to_be_saved = True
823 # Filter out entries that were not found while keeping the previous
824 # order.
825 self.state = [
826 (filename, size) for filename, size in self.state
827 if filename not in previous
828 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000829 self.trim()
830
831 def __enter__(self):
832 return self
833
834 def __exit__(self, _exc_type, _exec_value, _traceback):
835 with Profiler('CleanupTrimming'):
836 self.trim()
837
838 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000839 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000840 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000841 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000842 len(self.state),
843 sum(i[1] for i in self.state) / 1024)
844 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000845 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
846 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000847
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000848 def remove_file_at_index(self, index):
849 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000850 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +0000851 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000852 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000853 # If the lookup was already stale, its possible the filename was not
854 # present yet.
855 self._lookup_is_stale = True
856 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000857 self._removed.append(size)
858 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000859 except OSError as e:
860 logging.error('Error attempting to delete a file\n%s' % e)
861
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000862 def remove_lru_file(self):
863 """Removes the last recently used file."""
864 self.remove_file_at_index(0)
865
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000866 def trim(self):
867 """Trims anything we don't know, make sure enough free space exists."""
868 # Ensure maximum cache size.
869 if self.policies.max_cache_size and self.state:
870 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
871 self.remove_lru_file()
872
873 # Ensure maximum number of items in the cache.
874 if self.policies.max_items and self.state:
875 while len(self.state) > self.policies.max_items:
876 self.remove_lru_file()
877
878 # Ensure enough free space.
879 self._free_disk = get_free_space(self.cache_dir)
880 while (
881 self.policies.min_free_space and
882 self.state and
883 self._free_disk < self.policies.min_free_space):
884 self.remove_lru_file()
885 self._free_disk = get_free_space(self.cache_dir)
886
887 self.save()
888
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000889 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000890 """Retrieves a file from the remote, if not already cached, and adds it to
891 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000892
893 If the file is in the cache, verifiy that the file is valid (i.e. it is
894 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000895 """
896 assert not '/' in item
897 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000898 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000899 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000900
901 if index is not None:
902 if not valid_file(self.path(item), size):
903 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000904 index = None
905 else:
906 assert index < len(self.state)
907 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +0000908 self._state_need_to_be_saved = True
909 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000910 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000911
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000912 if index is None:
913 if item in self._pending_queue:
914 # Already pending. The same object could be referenced multiple times.
915 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000916 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000917 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000918
919 def add(self, filepath, obj):
920 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000921 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000922 if not obj in self._lookup:
923 link_file(self.path(obj), filepath, HARDLINK)
924 self._add(obj, True)
925
926 def path(self, item):
927 """Returns the path to one item."""
928 return os.path.join(self.cache_dir, item)
929
930 def save(self):
931 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000932 if self._state_need_to_be_saved:
933 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
934 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000935
936 def wait_for(self, items):
937 """Starts a loop that waits for at least one of |items| to be retrieved.
938
939 Returns the first item retrieved.
940 """
941 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +0000942 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000943 for item in items:
944 if item in self._lookup:
945 return item
946
947 assert all(i in self._pending_queue for i in items), (
948 items, self._pending_queue)
949 # Note that:
950 # len(self._pending_queue) ==
951 # ( len(self.remote._workers) - self.remote._ready +
952 # len(self._remote._queue) + len(self._remote.done))
953 # There is no lock-free way to verify that.
954 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000955 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000956 self._pending_queue.remove(item)
957 self._add(item, True)
958 if item in items:
959 return item
960
961 def _add(self, item, at_end):
962 """Adds an item in the internal state.
963
964 If |at_end| is False, self._lookup becomes inconsistent and
965 self._update_lookup() must be called.
966 """
967 size = os.stat(self.path(item)).st_size
968 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000969 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000970 if at_end:
971 self.state.append((item, size))
972 self._lookup[item] = len(self.state) - 1
973 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +0000974 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000975 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000976
977 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +0000978 if self._lookup_is_stale:
979 self._lookup = dict(
980 (filename, index) for index, (filename, _) in enumerate(self.state))
981 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000982
983
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000984class IsolatedFile(object):
985 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000986 def __init__(self, obj_hash):
987 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000988 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000989 self.obj_hash = obj_hash
990 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000991 # .isolate and all the .isolated files recursively included by it with
992 # 'includes' key. The order of each sha-1 in 'includes', each representing a
993 # .isolated file in the hash table, is important, as the later ones are not
994 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000995 self.can_fetch = False
996
997 # Raw data.
998 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000999 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001000 self.children = []
1001
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001002 # Set once the .isolated file is loaded.
1003 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001004 # Set once the files are fetched.
1005 self.files_fetched = False
1006
1007 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001008 """Verifies the .isolated file is valid and loads this object with the json
1009 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001010 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001011 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
1012 assert not self._is_parsed
1013 self.data = load_isolated(content)
1014 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
1015 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001016
1017 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001018 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001019
1020 Preemptively request files.
1021
1022 Note that |files| is modified by this function.
1023 """
1024 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001025 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001026 return
1027 logging.debug('fetch_files(%s)' % self.obj_hash)
1028 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001029 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001030 # overriden files must not be fetched.
1031 if filepath not in files:
1032 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001033 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001034 # Preemptively request files.
1035 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +00001036 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001037 self.files_fetched = True
1038
1039
1040class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001041 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001042 def __init__(self):
1043 self.command = []
1044 self.files = {}
1045 self.read_only = None
1046 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001047 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001048 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001049
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001050 def load(self, cache, root_isolated_hash):
1051 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001052
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001053 It enables support for "included" .isolated files. They are processed in
1054 strict order but fetched asynchronously from the cache. This is important so
1055 that a file in an included .isolated file that is overridden by an embedding
1056 .isolated file is not fetched neededlessly. The includes are fetched in one
1057 pass and the files are fetched as soon as all the ones on the left-side
1058 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001059
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001060 The prioritization is very important here for nested .isolated files.
1061 'includes' have the highest priority and the algorithm is optimized for both
1062 deep and wide trees. A deep one is a long link of .isolated files referenced
1063 one at a time by one item in 'includes'. A wide one has a large number of
1064 'includes' in a single .isolated file. 'left' is defined as an included
1065 .isolated file earlier in the 'includes' list. So the order of the elements
1066 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001067 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001068 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001069 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001070 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001071 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001072 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001073
1074 def update_self(node):
1075 node.fetch_files(cache, self.files)
1076 # Grabs properties.
1077 if not self.command and node.data.get('command'):
1078 self.command = node.data['command']
1079 if self.read_only is None and node.data.get('read_only') is not None:
1080 self.read_only = node.data['read_only']
1081 if (self.relative_cwd is None and
1082 node.data.get('relative_cwd') is not None):
1083 self.relative_cwd = node.data['relative_cwd']
1084
1085 def traverse_tree(node):
1086 if node.can_fetch:
1087 if not node.files_fetched:
1088 update_self(node)
1089 will_break = False
1090 for i in node.children:
1091 if not i.can_fetch:
1092 if will_break:
1093 break
1094 # Automatically mark the first one as fetcheable.
1095 i.can_fetch = True
1096 will_break = True
1097 traverse_tree(i)
1098
1099 while pending:
1100 item_hash = cache.wait_for(pending)
1101 item = pending.pop(item_hash)
1102 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001103 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001104 # It's the root item.
1105 item.can_fetch = True
1106
1107 for new_child in item.children:
1108 h = new_child.obj_hash
1109 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001110 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001111 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001112 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001113
1114 # Traverse the whole tree to see if files can now be fetched.
1115 traverse_tree(self.root)
1116 def check(n):
1117 return all(check(x) for x in n.children) and n.files_fetched
1118 assert check(self.root)
1119 self.relative_cwd = self.relative_cwd or ''
1120 self.read_only = self.read_only or False
1121
1122
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001123def create_directories(base_directory, files):
1124 """Creates the directory structure needed by the given list of files."""
1125 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1126 # Creates the tree of directories to create.
1127 directories = set(os.path.dirname(f) for f in files)
1128 for item in list(directories):
1129 while item:
1130 directories.add(item)
1131 item = os.path.dirname(item)
1132 for d in sorted(directories):
1133 if d:
1134 os.mkdir(os.path.join(base_directory, d))
1135
1136
1137def create_links(base_directory, files):
1138 """Creates any links needed by the given set of files."""
1139 for filepath, properties in files:
1140 if 'link' not in properties:
1141 continue
1142 outfile = os.path.join(base_directory, filepath)
1143 # symlink doesn't exist on Windows. So the 'link' property should
1144 # never be specified for windows .isolated file.
1145 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1146 if 'm' in properties:
1147 lchmod = getattr(os, 'lchmod', None)
1148 if lchmod:
1149 lchmod(outfile, properties['m'])
1150
1151
1152def setup_commands(base_directory, cwd, cmd):
1153 """Correctly adjusts and then returns the required working directory
1154 and command needed to run the test.
1155 """
1156 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1157 cwd = os.path.join(base_directory, cwd)
1158 if not os.path.isdir(cwd):
1159 os.makedirs(cwd)
1160
1161 # Ensure paths are correctly separated on windows.
1162 cmd[0] = cmd[0].replace('/', os.path.sep)
1163 cmd = fix_python_path(cmd)
1164
1165 return cwd, cmd
1166
1167
1168def generate_remaining_files(files):
1169 """Generates a dictionary of all the remaining files to be downloaded."""
1170 remaining = {}
1171 for filepath, props in files:
1172 if 'h' in props:
1173 remaining.setdefault(props['h'], []).append((filepath, props))
1174
1175 return remaining
1176
1177
1178def download_test_data(isolated_hash, target_directory, remote):
1179 """Downloads the dependencies to the given directory."""
1180 if not os.path.exists(target_directory):
1181 os.makedirs(target_directory)
1182
1183 settings = Settings()
1184 no_cache = NoCache(target_directory, Remote(remote))
1185
1186 # Download all the isolated files.
1187 with Profiler('GetIsolateds') as _prof:
1188 settings.load(no_cache, isolated_hash)
1189
1190 if not settings.command:
1191 print >> sys.stderr, 'No command to run'
1192 return 1
1193
1194 with Profiler('GetRest') as _prof:
1195 create_directories(target_directory, settings.files)
1196 create_links(target_directory, settings.files.iteritems())
1197
1198 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1199 settings.command[:])
1200
1201 remaining = generate_remaining_files(settings.files.iteritems())
1202
1203 # Now block on the remaining files to be downloaded and mapped.
1204 logging.info('Retrieving remaining files')
1205 last_update = time.time()
1206 while remaining:
1207 obj = no_cache.wait_for(remaining)
1208 files = remaining.pop(obj)
1209
1210 for i, (filepath, properties) in enumerate(files):
1211 outfile = os.path.join(target_directory, filepath)
1212 logging.info(no_cache.path(obj))
1213
1214 if i + 1 == len(files):
1215 os.rename(no_cache.path(obj), outfile)
1216 else:
1217 shutil.copyfile(no_cache.path(obj), outfile)
1218
1219 if 'm' in properties:
1220 # It's not set on Windows.
1221 os.chmod(outfile, properties['m'])
1222
1223 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1224 logging.info('%d files remaining...' % len(remaining))
1225 last_update = time.time()
1226
1227 print('.isolated files successfully downloaded and setup in %s' %
1228 target_directory)
1229 print('To run this test please run the command %s from the directory %s' %
1230 (cmd, cwd))
1231
1232 return 0
1233
1234
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001235def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001236 """Downloads the dependencies in the cache, hardlinks them into a temporary
1237 directory and runs the executable.
1238 """
1239 settings = Settings()
1240 with Cache(cache_dir, Remote(remote), policies) as cache:
1241 outdir = make_temp_dir('run_tha_test', cache_dir)
1242 try:
1243 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001244 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001245 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001246 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001247 # Adds it in the cache. While not strictly necessary, this simplifies
1248 # the rest.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001249 h = hashlib.sha1(open(isolated_hash, 'r').read()).hexdigest()
1250 cache.add(isolated_hash, h)
1251 isolated_hash = h
1252 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001253
1254 if not settings.command:
1255 print >> sys.stderr, 'No command to run'
1256 return 1
1257
1258 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001259 create_directories(outdir, settings.files)
1260 create_links(outdir, settings.files.iteritems())
1261 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001262
1263 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001264 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1265 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001266
1267 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001268 logging.info('Retrieving remaining files')
1269 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001270 while remaining:
1271 obj = cache.wait_for(remaining)
1272 for filepath, properties in remaining.pop(obj):
1273 outfile = os.path.join(outdir, filepath)
1274 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001275 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001276 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001277 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001278
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001279 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1280 logging.info('%d files remaining...' % len(remaining))
1281 last_update = time.time()
1282
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001283 if settings.read_only:
1284 make_writable(outdir, True)
1285 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001286
1287 # TODO(csharp): This should be specified somewhere else.
1288 # Add a rotating log file if one doesn't already exist.
1289 env = os.environ.copy()
1290 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001291 try:
1292 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001293 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001294 except OSError:
1295 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1296 raise
1297 finally:
1298 rmtree(outdir)
1299
1300
1301def main():
1302 parser = optparse.OptionParser(
1303 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1304 parser.add_option(
1305 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1306 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1307
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001308 group = optparse.OptionGroup(parser, 'Download')
1309 group.add_option(
1310 '--download', metavar='DEST',
1311 help='Downloads files to DEST and returns without running, instead of '
1312 'downloading and then running from a temporary directory.')
1313 parser.add_option_group(group)
1314
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001315 group = optparse.OptionGroup(parser, 'Data source')
1316 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001317 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001318 metavar='FILE',
1319 help='File/url describing what to map or run')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001320 # TODO(maruel): Remove once not used anymore.
1321 group.add_option(
1322 '-m', '--manifest', dest='isolated', help=optparse.SUPPRESS_HELP)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001323 group.add_option(
1324 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001325 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001326 parser.add_option_group(group)
1327
1328 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001329 '-r', '--remote', metavar='URL',
1330 default=
1331 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1332 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001333 group = optparse.OptionGroup(parser, 'Cache management')
1334 group.add_option(
1335 '--cache',
1336 default='cache',
1337 metavar='DIR',
1338 help='Cache directory, default=%default')
1339 group.add_option(
1340 '--max-cache-size',
1341 type='int',
1342 metavar='NNN',
1343 default=20*1024*1024*1024,
1344 help='Trim if the cache gets larger than this value, default=%default')
1345 group.add_option(
1346 '--min-free-space',
1347 type='int',
1348 metavar='NNN',
1349 default=1*1024*1024*1024,
1350 help='Trim if disk free space becomes lower than this value, '
1351 'default=%default')
1352 group.add_option(
1353 '--max-items',
1354 type='int',
1355 metavar='NNN',
1356 default=100000,
1357 help='Trim if more than this number of items are in the cache '
1358 'default=%default')
1359 parser.add_option_group(group)
1360
1361 options, args = parser.parse_args()
1362 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001363
1364 logging_console = logging.StreamHandler()
1365 logging_console.setFormatter(logging.Formatter(
1366 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1367 logging_console.setLevel(level)
1368 logging.getLogger().addHandler(logging_console)
1369
1370 logging_rotating_file = logging.handlers.RotatingFileHandler(
1371 RUN_ISOLATED_LOG_FILE,
1372 maxBytes=10 * 1024 * 1024, backupCount=5)
1373 logging_rotating_file.setLevel(logging.DEBUG)
1374 logging_rotating_file.setFormatter(logging.Formatter(
1375 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1376 logging.getLogger().addHandler(logging_rotating_file)
1377
1378 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001379
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001380 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001381 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001382 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001383 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001384 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001385 parser.error('Unsupported args %s' % ' '.join(args))
1386
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001387 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001388 policies = CachePolicies(
1389 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001390
1391 if options.download:
1392 return download_test_data(options.isolated or options.hash,
1393 options.download, options.remote)
1394 else:
1395 try:
1396 return run_tha_test(
1397 options.isolated or options.hash,
1398 options.cache,
1399 options.remote,
1400 policies)
1401 except Exception, e:
1402 # Make sure any exception is logged.
1403 logging.exception(e)
1404 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001405
1406
1407if __name__ == '__main__':
1408 sys.exit(main())