blob: b421a561b103a9fa7629fa63cdcd03f4a855f26f [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000011import ctypes
12import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000013import httplib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000014import json
15import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000016import logging.handlers
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000017import optparse
18import os
19import Queue
20import re
21import shutil
22import stat
23import subprocess
24import sys
25import tempfile
26import threading
27import time
28import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000029import urllib2
30import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000031
32
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000033# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000034HARDLINK, SYMLINK, COPY = range(1, 4)
35
36RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
37
csharp@chromium.org8dc52542012-11-08 20:29:55 +000038# The file size to be used when we don't know the correct file size,
39# generally used for .isolated files.
40UNKNOWN_FILE_SIZE = None
41
csharp@chromium.orga92403f2012-11-20 15:13:59 +000042# The size of each chunk to read when downloading and unzipping files.
43ZIPPED_FILE_CHUNK = 16 * 1024
44
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000045# The name of the log file to use.
46RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
47
csharp@chromium.orge217f302012-11-22 16:51:53 +000048# The base directory containing this file.
49BASE_DIR = os.path.dirname(os.path.abspath(__file__))
50
51# The name of the log to use for the run_test_cases.py command
52RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
53
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000054# The delay (in seconds) to wait between logging statements when retrieving
55# the required files. This is intended to let the user (or buildbot) know that
56# the program is still running.
57DELAY_BETWEEN_UPDATES_IN_SECS = 30
58
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000059
60class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +000061 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000062 pass
63
64
65class MappingError(OSError):
66 """Failed to recreate the tree."""
67 pass
68
69
csharp@chromium.orga92403f2012-11-20 15:13:59 +000070class DownloadFileOpener(urllib.FancyURLopener):
71 """This class is needed to get urlretrive to raise an exception on
72 404 errors, instead of still writing to the file with the error code.
73 """
74 def http_error_default(self, url, fp, errcode, errmsg, headers):
75 raise urllib2.HTTPError(url, errcode, errmsg, headers, fp)
76
77
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000078def get_flavor():
79 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
80 flavors = {
81 'cygwin': 'win',
82 'win32': 'win',
83 'darwin': 'mac',
84 'sunos5': 'solaris',
85 'freebsd7': 'freebsd',
86 'freebsd8': 'freebsd',
87 }
88 return flavors.get(sys.platform, 'linux')
89
90
91def os_link(source, link_name):
92 """Add support for os.link() on Windows."""
93 if sys.platform == 'win32':
94 if not ctypes.windll.kernel32.CreateHardLinkW(
95 unicode(link_name), unicode(source), 0):
96 raise OSError()
97 else:
98 os.link(source, link_name)
99
100
101def readable_copy(outfile, infile):
102 """Makes a copy of the file that is readable by everyone."""
103 shutil.copy(infile, outfile)
104 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
105 stat.S_IRGRP | stat.S_IROTH)
106 os.chmod(outfile, read_enabled_mode)
107
108
109def link_file(outfile, infile, action):
110 """Links a file. The type of link depends on |action|."""
111 logging.debug('Mapping %s to %s' % (infile, outfile))
112 if action not in (HARDLINK, SYMLINK, COPY):
113 raise ValueError('Unknown mapping action %s' % action)
114 if not os.path.isfile(infile):
115 raise MappingError('%s is missing' % infile)
116 if os.path.isfile(outfile):
117 raise MappingError(
118 '%s already exist; insize:%d; outsize:%d' %
119 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
120
121 if action == COPY:
122 readable_copy(outfile, infile)
123 elif action == SYMLINK and sys.platform != 'win32':
124 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000125 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000126 else:
127 try:
128 os_link(infile, outfile)
129 except OSError:
130 # Probably a different file system.
131 logging.warn(
132 'Failed to hardlink, failing back to copy %s to %s' % (
133 infile, outfile))
134 readable_copy(outfile, infile)
135
136
137def _set_write_bit(path, read_only):
138 """Sets or resets the executable bit on a file or directory."""
139 mode = os.lstat(path).st_mode
140 if read_only:
141 mode = mode & 0500
142 else:
143 mode = mode | 0200
144 if hasattr(os, 'lchmod'):
145 os.lchmod(path, mode) # pylint: disable=E1101
146 else:
147 if stat.S_ISLNK(mode):
148 # Skip symlink without lchmod() support.
149 logging.debug('Can\'t change +w bit on symlink %s' % path)
150 return
151
152 # TODO(maruel): Implement proper DACL modification on Windows.
153 os.chmod(path, mode)
154
155
156def make_writable(root, read_only):
157 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000158 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000159 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
160 for filename in filenames:
161 _set_write_bit(os.path.join(dirpath, filename), read_only)
162
163 for dirname in dirnames:
164 _set_write_bit(os.path.join(dirpath, dirname), read_only)
165
166
167def rmtree(root):
168 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
169 make_writable(root, False)
170 if sys.platform == 'win32':
171 for i in range(3):
172 try:
173 shutil.rmtree(root)
174 break
175 except WindowsError: # pylint: disable=E0602
176 delay = (i+1)*2
177 print >> sys.stderr, (
178 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
179 time.sleep(delay)
180 else:
181 shutil.rmtree(root)
182
183
184def is_same_filesystem(path1, path2):
185 """Returns True if both paths are on the same filesystem.
186
187 This is required to enable the use of hardlinks.
188 """
189 assert os.path.isabs(path1), path1
190 assert os.path.isabs(path2), path2
191 if sys.platform == 'win32':
192 # If the drive letter mismatches, assume it's a separate partition.
193 # TODO(maruel): It should look at the underlying drive, a drive letter could
194 # be a mount point to a directory on another drive.
195 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
196 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
197 if path1[0].lower() != path2[0].lower():
198 return False
199 return os.stat(path1).st_dev == os.stat(path2).st_dev
200
201
202def get_free_space(path):
203 """Returns the number of free bytes."""
204 if sys.platform == 'win32':
205 free_bytes = ctypes.c_ulonglong(0)
206 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
207 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
208 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000209 # For OSes other than Windows.
210 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000211 return f.f_bfree * f.f_frsize
212
213
214def make_temp_dir(prefix, root_dir):
215 """Returns a temporary directory on the same file system as root_dir."""
216 base_temp_dir = None
217 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
218 base_temp_dir = os.path.dirname(root_dir)
219 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
220
221
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000222def load_isolated(content):
223 """Verifies the .isolated file is valid and loads this object with the json
224 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000225 """
226 try:
227 data = json.loads(content)
228 except ValueError:
229 raise ConfigError('Failed to parse: %s...' % content[:100])
230
231 if not isinstance(data, dict):
232 raise ConfigError('Expected dict, got %r' % data)
233
234 for key, value in data.iteritems():
235 if key == 'command':
236 if not isinstance(value, list):
237 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000238 if not value:
239 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000240 for subvalue in value:
241 if not isinstance(subvalue, basestring):
242 raise ConfigError('Expected string, got %r' % subvalue)
243
244 elif key == 'files':
245 if not isinstance(value, dict):
246 raise ConfigError('Expected dict, got %r' % value)
247 for subkey, subvalue in value.iteritems():
248 if not isinstance(subkey, basestring):
249 raise ConfigError('Expected string, got %r' % subkey)
250 if not isinstance(subvalue, dict):
251 raise ConfigError('Expected dict, got %r' % subvalue)
252 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000253 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000254 if not isinstance(subsubvalue, basestring):
255 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000256 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000257 if not isinstance(subsubvalue, int):
258 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000259 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000260 if not RE_IS_SHA1.match(subsubvalue):
261 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000262 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000263 if not isinstance(subsubvalue, int):
264 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000265 else:
266 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000267 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000268 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000269 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
270 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000271
272 elif key == 'includes':
273 if not isinstance(value, list):
274 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000275 if not value:
276 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000277 for subvalue in value:
278 if not RE_IS_SHA1.match(subvalue):
279 raise ConfigError('Expected sha-1, got %r' % subvalue)
280
281 elif key == 'read_only':
282 if not isinstance(value, bool):
283 raise ConfigError('Expected bool, got %r' % value)
284
285 elif key == 'relative_cwd':
286 if not isinstance(value, basestring):
287 raise ConfigError('Expected string, got %r' % value)
288
289 elif key == 'os':
290 if value != get_flavor():
291 raise ConfigError(
292 'Expected \'os\' to be \'%s\' but got \'%s\'' %
293 (get_flavor(), value))
294
295 else:
296 raise ConfigError('Unknown key %s' % key)
297
298 return data
299
300
301def fix_python_path(cmd):
302 """Returns the fixed command line to call the right python executable."""
303 out = cmd[:]
304 if out[0] == 'python':
305 out[0] = sys.executable
306 elif out[0].endswith('.py'):
307 out.insert(0, sys.executable)
308 return out
309
310
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000311class ThreadPool(object):
312 """Implements a multithreaded worker pool oriented for mapping jobs with
313 thread-local result storage.
314 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000315 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000316
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000317 def __init__(self, initial_threads, max_threads, queue_size):
318 logging.debug(
319 'ThreadPool(%d, %d, %d)', initial_threads, max_threads, queue_size)
320 assert initial_threads <= max_threads
321 # Update this check once 256 cores CPU are common.
322 assert max_threads <= 256
323
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000324 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000325 self._max_threads = max_threads
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000326
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000327 # Mutables.
328 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000329 self._num_of_added_tasks = 0
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000330 self._outputs_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000331 self._outputs = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000332 self._exceptions_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000333 self._exceptions = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000334 # Number of threads in wait state.
335 self._ready_lock = threading.Lock()
336 self._ready = 0
337 self._workers_lock = threading.Lock()
338 self._workers = []
339 for _ in range(initial_threads):
340 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000341
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000342 def _add_worker(self):
343 """Adds one worker thread if there isn't too many. Thread-safe."""
344 # Better to take the lock two times than hold it for too long.
345 with self._workers_lock:
346 if len(self._workers) >= self._max_threads:
347 return False
348 worker = threading.Thread(target=self._run)
349 with self._workers_lock:
350 if len(self._workers) >= self._max_threads:
351 return False
352 self._workers.append(worker)
353 worker.daemon = True
354 worker.start()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000355
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000356 def add_task(self, func, *args, **kwargs):
357 """Adds a task, a function to be executed by a worker.
358
359 The function's return value will be stored in the the worker's thread local
360 outputs list.
361 """
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000362 priority = 0
363 with self._ready_lock:
364 start_new_worker = not self._ready
365 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000366 self._num_of_added_tasks += 1
367 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000368 self.tasks.put((priority, index, func, args, kwargs))
369 if start_new_worker:
370 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000371
372 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000373 """Worker thread loop. Runs until a None task is queued."""
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000374 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000375 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000376 with self._ready_lock:
377 self._ready += 1
378 task = self.tasks.get()
379 finally:
380 with self._ready_lock:
381 self._ready -= 1
382 try:
383 if task is None:
384 # We're done.
385 return
386 _priority, _index, func, args, kwargs = task
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000387 out = func(*args, **kwargs)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000388 with self._outputs_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000389 self._outputs.append(out)
390 except Exception as e:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000391 exc_info = sys.exc_info()
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000392 with self._exceptions_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000393 self._exceptions.append(exc_info)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000394 logging.error('Caught exception! %s' % e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000395 finally:
396 self.tasks.task_done()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000397
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000398 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000399 """Extracts all the results from each threads unordered.
400
401 Call repeatedly to extract all the exceptions if desired.
402 """
403 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000404 self.tasks.join()
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000405 with self._exceptions_lock:
406 if self._exceptions:
407 e = self._exceptions.pop(0)
408 raise e[0], e[1], e[2]
409 with self._outputs_lock:
410 out = self._outputs
411 self._outputs = []
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000412 return out
413
414 def close(self):
415 """Closes all the threads."""
416 for _ in range(len(self._workers)):
417 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000418 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000419 for t in self._workers:
420 t.join()
421
422 def __enter__(self):
423 """Enables 'with' statement."""
424 return self
425
426 def __exit__(self, exc_type, exc_value, traceback):
427 """Enables 'with' statement."""
428 self.close()
429
430
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000431def valid_file(filepath, size):
432 """Determines if the given files appears valid (currently it just checks
433 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000434 if size == UNKNOWN_FILE_SIZE:
435 return True
436 actual_size = os.stat(filepath).st_size
437 if size != actual_size:
438 logging.warning(
439 'Found invalid item %s; %d != %d',
440 os.path.basename(filepath), actual_size, size)
441 return False
442 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000443
444
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000445class Profiler(object):
446 def __init__(self, name):
447 self.name = name
448 self.start_time = None
449
450 def __enter__(self):
451 self.start_time = time.time()
452 return self
453
454 def __exit__(self, _exc_type, _exec_value, _traceback):
455 time_taken = time.time() - self.start_time
456 logging.info('Profiling: Section %s took %3.3f seconds',
457 self.name, time_taken)
458
459
460class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000461 """Priority based worker queue to fetch or upload files from a
462 content-address server. Any function may be given as the fetcher/upload,
463 as long as it takes two inputs (the item contents, and their relative
464 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000465
466 Supports local file system, CIFS or http remotes.
467
468 When the priority of items is equals, works in strict FIFO mode.
469 """
470 # Initial and maximum number of worker threads.
471 INITIAL_WORKERS = 2
472 MAX_WORKERS = 16
473 # Priorities.
474 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
475 INTERNAL_PRIORITY_BITS = (1<<8) - 1
476 RETRIES = 5
477
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000478 def __init__(self, destination_root):
479 # Function to fetch a remote object or upload to a remote location..
480 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000481 # Contains tuple(priority, index, obj, destination).
482 self._queue = Queue.PriorityQueue()
483 # Contains tuple(priority, index, obj).
484 self._done = Queue.PriorityQueue()
485
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000486 # Contains generated exceptions that haven't been handled yet.
487 self._exceptions = Queue.Queue()
488
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000489 # To keep FIFO ordering in self._queue. It is assumed xrange's iterator is
490 # thread-safe.
491 self._next_index = xrange(0, 1<<30).__iter__().next
492
493 # Control access to the following member.
494 self._ready_lock = threading.Lock()
495 # Number of threads in wait state.
496 self._ready = 0
497
498 # Control access to the following member.
499 self._workers_lock = threading.Lock()
500 self._workers = []
501 for _ in range(self.INITIAL_WORKERS):
502 self._add_worker()
503
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000504 def join(self):
505 """Blocks until the queue is empty."""
506 self._queue.join()
507
508 def next_exception(self):
509 """Returns the next unhandled exception, or None if there is
510 no exception."""
511 try:
512 return self._exceptions.get_nowait()
513 except Queue.Empty:
514 return None
515
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000516 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000517 """Retrieves an object from the remote data store.
518
519 The smaller |priority| gets fetched first.
520
521 Thread-safe.
522 """
523 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000524 self._add_to_queue(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000525
526 def get_result(self):
527 """Returns the next file that was successfully fetched."""
528 r = self._done.get()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000529 if r[0] == -1:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000530 # It's an exception.
531 raise r[2][0], r[2][1], r[2][2]
532 return r[2]
533
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000534 def _add_to_queue(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000535 with self._ready_lock:
536 start_new_worker = not self._ready
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000537 self._queue.put((priority, self._next_index(), obj, dest, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000538 if start_new_worker:
539 self._add_worker()
540
541 def _add_worker(self):
542 """Add one worker thread if there isn't too many. Thread-safe."""
543 with self._workers_lock:
544 if len(self._workers) >= self.MAX_WORKERS:
545 return False
546 worker = threading.Thread(target=self._run)
547 self._workers.append(worker)
548 worker.daemon = True
549 worker.start()
550
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000551 def _step_done(self, result):
552 """Worker helper function"""
553 self._done.put(result)
554 self._queue.task_done()
555 if result[0] == -1:
556 self._exceptions.put(sys.exc_info())
557
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000558 def _run(self):
559 """Worker thread loop."""
560 while True:
561 try:
562 with self._ready_lock:
563 self._ready += 1
564 item = self._queue.get()
565 finally:
566 with self._ready_lock:
567 self._ready -= 1
568 if not item:
569 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000570 priority, index, obj, dest, size = item
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000571 try:
572 self._do_item(obj, dest)
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000573 if size and not valid_file(dest, size):
574 download_size = os.stat(dest).st_size
575 os.remove(dest)
576 raise IOError('File incorrect size after download of %s. Got %s and '
maruel@chromium.org3f039182012-11-27 21:32:41 +0000577 'expected %s' % (obj, download_size, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000578 except IOError:
579 # Retry a few times, lowering the priority.
580 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000581 self._add_to_queue(priority + 1, obj, dest, size)
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000582 self._queue.task_done()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000583 continue
584 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000585 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000586 except:
587 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000588 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000589 else:
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000590 self._step_done((priority, index, obj))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000591
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000592 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000593 """Returns a object to retrieve objects from a remote."""
594 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000595 def download_file(item, dest):
596 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
597 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000598 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +0000599 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000600 logging.debug('download_file(%s)', zipped_source)
601 connection = urllib2.urlopen(zipped_source)
602 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +0000603 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000604 with open(dest, 'wb') as f:
605 while True:
606 chunk = connection.read(ZIPPED_FILE_CHUNK)
607 if not chunk:
608 break
maruel@chromium.org3f039182012-11-27 21:32:41 +0000609 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000610 f.write(decompressor.decompress(chunk))
611 # Ensure that all the data was properly decompressed.
612 uncompressed_data = decompressor.flush()
613 assert not uncompressed_data
csharp@chromium.orga110d792013-01-07 16:16:16 +0000614 except httplib.HTTPException as e:
615 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +0000616 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +0000617 # Log the first bytes to see if it's uncompressed data.
618 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +0000619 raise IOError(
620 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
621 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000622
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000623 return download_file
624
625 def copy_file(item, dest):
626 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000627 if source == dest:
628 logging.info('Source and destination are the same, no action required')
629 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000630 logging.debug('copy_file(%s, %s)', source, dest)
631 shutil.copy(source, dest)
632 return copy_file
633
634
635class CachePolicies(object):
636 def __init__(self, max_cache_size, min_free_space, max_items):
637 """
638 Arguments:
639 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
640 cache is effectively a leak.
641 - min_free_space: Trim if disk free space becomes lower than this value. If
642 0, it unconditionally fill the disk.
643 - max_items: Maximum number of items to keep in the cache. If 0, do not
644 enforce a limit.
645 """
646 self.max_cache_size = max_cache_size
647 self.min_free_space = min_free_space
648 self.max_items = max_items
649
650
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000651class NoCache(object):
652 """This class is intended to be usable everywhere the Cache class is.
653 Instead of downloading to a cache, all files are downloaded to the target
654 directory and then moved to where they are needed.
655 """
656
657 def __init__(self, target_directory, remote):
658 self.target_directory = target_directory
659 self.remote = remote
660
661 def retrieve(self, priority, item, size):
662 """Get the request file."""
663 self.remote.add_item(priority, item, self.path(item), size)
664 self.remote.get_result()
665
666 def wait_for(self, items):
667 """Download the first item of the given list if it is missing."""
668 item = items.iterkeys().next()
669
670 if not os.path.exists(self.path(item)):
671 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
672 downloaded = self.remote.get_result()
673 assert downloaded == item
674
675 return item
676
677 def path(self, item):
678 return os.path.join(self.target_directory, item)
679
680
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000681class Cache(object):
682 """Stateful LRU cache.
683
684 Saves its state as json file.
685 """
686 STATE_FILE = 'state.json'
687
688 def __init__(self, cache_dir, remote, policies):
689 """
690 Arguments:
691 - cache_dir: Directory where to place the cache.
692 - remote: Remote where to fetch items from.
693 - policies: cache retention policies.
694 """
695 self.cache_dir = cache_dir
696 self.remote = remote
697 self.policies = policies
698 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
699 # The tuple(file, size) are kept as an array in a LRU style. E.g.
700 # self.state[0] is the oldest item.
701 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +0000702 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000703 # A lookup map to speed up searching.
704 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +0000705 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000706
707 # Items currently being fetched. Keep it local to reduce lock contention.
708 self._pending_queue = set()
709
710 # Profiling values.
711 self._added = []
712 self._removed = []
713 self._free_disk = 0
714
maruel@chromium.org770993b2012-12-11 17:16:48 +0000715 with Profiler('Setup'):
716 if not os.path.isdir(self.cache_dir):
717 os.makedirs(self.cache_dir)
718 if os.path.isfile(self.state_file):
719 try:
720 self.state = json.load(open(self.state_file, 'r'))
721 except (IOError, ValueError), e:
722 # Too bad. The file will be overwritten and the cache cleared.
723 logging.error(
724 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
725 self._state_need_to_be_saved = True
726 if (not isinstance(self.state, list) or
727 not all(
728 isinstance(i, (list, tuple)) and len(i) == 2
729 for i in self.state)):
730 # Discard.
731 self._state_need_to_be_saved = True
732 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000733
maruel@chromium.org770993b2012-12-11 17:16:48 +0000734 # Ensure that all files listed in the state still exist and add new ones.
735 previous = set(filename for filename, _ in self.state)
736 if len(previous) != len(self.state):
737 logging.warn('Cache state is corrupted, found duplicate files')
738 self._state_need_to_be_saved = True
739 self.state = []
740
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000741 added = 0
742 for filename in os.listdir(self.cache_dir):
743 if filename == self.STATE_FILE:
744 continue
745 if filename in previous:
746 previous.remove(filename)
747 continue
748 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000749 if not RE_IS_SHA1.match(filename):
750 logging.warn('Removing unknown file %s from cache', filename)
751 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +0000752 continue
753 # Insert as the oldest file. It will be deleted eventually if not
754 # accessed.
755 self._add(filename, False)
756 logging.warn('Add unknown file %s to cache', filename)
757 added += 1
758
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000759 if added:
760 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000761 if previous:
762 logging.warn('Removed %d lost files', len(previous))
763 # Set explicitly in case self._add() wasn't called.
764 self._state_need_to_be_saved = True
765 # Filter out entries that were not found while keeping the previous
766 # order.
767 self.state = [
768 (filename, size) for filename, size in self.state
769 if filename not in previous
770 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000771 self.trim()
772
773 def __enter__(self):
774 return self
775
776 def __exit__(self, _exc_type, _exec_value, _traceback):
777 with Profiler('CleanupTrimming'):
778 self.trim()
779
780 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000781 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000782 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000783 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000784 len(self.state),
785 sum(i[1] for i in self.state) / 1024)
786 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000787 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
788 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000789
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000790 def remove_file_at_index(self, index):
791 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000792 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +0000793 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000794 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000795 # If the lookup was already stale, its possible the filename was not
796 # present yet.
797 self._lookup_is_stale = True
798 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000799 self._removed.append(size)
800 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000801 except OSError as e:
802 logging.error('Error attempting to delete a file\n%s' % e)
803
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000804 def remove_lru_file(self):
805 """Removes the last recently used file."""
806 self.remove_file_at_index(0)
807
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000808 def trim(self):
809 """Trims anything we don't know, make sure enough free space exists."""
810 # Ensure maximum cache size.
811 if self.policies.max_cache_size and self.state:
812 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
813 self.remove_lru_file()
814
815 # Ensure maximum number of items in the cache.
816 if self.policies.max_items and self.state:
817 while len(self.state) > self.policies.max_items:
818 self.remove_lru_file()
819
820 # Ensure enough free space.
821 self._free_disk = get_free_space(self.cache_dir)
822 while (
823 self.policies.min_free_space and
824 self.state and
825 self._free_disk < self.policies.min_free_space):
826 self.remove_lru_file()
827 self._free_disk = get_free_space(self.cache_dir)
828
829 self.save()
830
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000831 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000832 """Retrieves a file from the remote, if not already cached, and adds it to
833 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000834
835 If the file is in the cache, verifiy that the file is valid (i.e. it is
836 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000837 """
838 assert not '/' in item
839 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000840 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000841 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000842
843 if index is not None:
844 if not valid_file(self.path(item), size):
845 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000846 index = None
847 else:
848 assert index < len(self.state)
849 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +0000850 self._state_need_to_be_saved = True
851 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000852 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000853
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000854 if index is None:
855 if item in self._pending_queue:
856 # Already pending. The same object could be referenced multiple times.
857 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000858 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000859 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000860
861 def add(self, filepath, obj):
862 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000863 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000864 if not obj in self._lookup:
865 link_file(self.path(obj), filepath, HARDLINK)
866 self._add(obj, True)
867
868 def path(self, item):
869 """Returns the path to one item."""
870 return os.path.join(self.cache_dir, item)
871
872 def save(self):
873 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000874 if self._state_need_to_be_saved:
875 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
876 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000877
878 def wait_for(self, items):
879 """Starts a loop that waits for at least one of |items| to be retrieved.
880
881 Returns the first item retrieved.
882 """
883 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +0000884 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000885 for item in items:
886 if item in self._lookup:
887 return item
888
889 assert all(i in self._pending_queue for i in items), (
890 items, self._pending_queue)
891 # Note that:
892 # len(self._pending_queue) ==
893 # ( len(self.remote._workers) - self.remote._ready +
894 # len(self._remote._queue) + len(self._remote.done))
895 # There is no lock-free way to verify that.
896 while self._pending_queue:
897 item = self.remote.get_result()
898 self._pending_queue.remove(item)
899 self._add(item, True)
900 if item in items:
901 return item
902
903 def _add(self, item, at_end):
904 """Adds an item in the internal state.
905
906 If |at_end| is False, self._lookup becomes inconsistent and
907 self._update_lookup() must be called.
908 """
909 size = os.stat(self.path(item)).st_size
910 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000911 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000912 if at_end:
913 self.state.append((item, size))
914 self._lookup[item] = len(self.state) - 1
915 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +0000916 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000917 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000918
919 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +0000920 if self._lookup_is_stale:
921 self._lookup = dict(
922 (filename, index) for index, (filename, _) in enumerate(self.state))
923 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000924
925
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000926class IsolatedFile(object):
927 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000928 def __init__(self, obj_hash):
929 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000930 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000931 self.obj_hash = obj_hash
932 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000933 # .isolate and all the .isolated files recursively included by it with
934 # 'includes' key. The order of each sha-1 in 'includes', each representing a
935 # .isolated file in the hash table, is important, as the later ones are not
936 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000937 self.can_fetch = False
938
939 # Raw data.
940 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000941 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000942 self.children = []
943
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000944 # Set once the .isolated file is loaded.
945 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000946 # Set once the files are fetched.
947 self.files_fetched = False
948
949 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000950 """Verifies the .isolated file is valid and loads this object with the json
951 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000952 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000953 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
954 assert not self._is_parsed
955 self.data = load_isolated(content)
956 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
957 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000958
959 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000960 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000961
962 Preemptively request files.
963
964 Note that |files| is modified by this function.
965 """
966 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000967 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000968 return
969 logging.debug('fetch_files(%s)' % self.obj_hash)
970 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000971 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000972 # overriden files must not be fetched.
973 if filepath not in files:
974 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000975 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000976 # Preemptively request files.
977 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000978 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000979 self.files_fetched = True
980
981
982class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000983 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000984 def __init__(self):
985 self.command = []
986 self.files = {}
987 self.read_only = None
988 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000989 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000990 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000991
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000992 def load(self, cache, root_isolated_hash):
993 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000994
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000995 It enables support for "included" .isolated files. They are processed in
996 strict order but fetched asynchronously from the cache. This is important so
997 that a file in an included .isolated file that is overridden by an embedding
998 .isolated file is not fetched neededlessly. The includes are fetched in one
999 pass and the files are fetched as soon as all the ones on the left-side
1000 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001001
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001002 The prioritization is very important here for nested .isolated files.
1003 'includes' have the highest priority and the algorithm is optimized for both
1004 deep and wide trees. A deep one is a long link of .isolated files referenced
1005 one at a time by one item in 'includes'. A wide one has a large number of
1006 'includes' in a single .isolated file. 'left' is defined as an included
1007 .isolated file earlier in the 'includes' list. So the order of the elements
1008 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001009 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001010 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001011 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001012 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001013 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001014 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001015
1016 def update_self(node):
1017 node.fetch_files(cache, self.files)
1018 # Grabs properties.
1019 if not self.command and node.data.get('command'):
1020 self.command = node.data['command']
1021 if self.read_only is None and node.data.get('read_only') is not None:
1022 self.read_only = node.data['read_only']
1023 if (self.relative_cwd is None and
1024 node.data.get('relative_cwd') is not None):
1025 self.relative_cwd = node.data['relative_cwd']
1026
1027 def traverse_tree(node):
1028 if node.can_fetch:
1029 if not node.files_fetched:
1030 update_self(node)
1031 will_break = False
1032 for i in node.children:
1033 if not i.can_fetch:
1034 if will_break:
1035 break
1036 # Automatically mark the first one as fetcheable.
1037 i.can_fetch = True
1038 will_break = True
1039 traverse_tree(i)
1040
1041 while pending:
1042 item_hash = cache.wait_for(pending)
1043 item = pending.pop(item_hash)
1044 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001045 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001046 # It's the root item.
1047 item.can_fetch = True
1048
1049 for new_child in item.children:
1050 h = new_child.obj_hash
1051 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001052 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001053 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001054 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001055
1056 # Traverse the whole tree to see if files can now be fetched.
1057 traverse_tree(self.root)
1058 def check(n):
1059 return all(check(x) for x in n.children) and n.files_fetched
1060 assert check(self.root)
1061 self.relative_cwd = self.relative_cwd or ''
1062 self.read_only = self.read_only or False
1063
1064
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001065def create_directories(base_directory, files):
1066 """Creates the directory structure needed by the given list of files."""
1067 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1068 # Creates the tree of directories to create.
1069 directories = set(os.path.dirname(f) for f in files)
1070 for item in list(directories):
1071 while item:
1072 directories.add(item)
1073 item = os.path.dirname(item)
1074 for d in sorted(directories):
1075 if d:
1076 os.mkdir(os.path.join(base_directory, d))
1077
1078
1079def create_links(base_directory, files):
1080 """Creates any links needed by the given set of files."""
1081 for filepath, properties in files:
1082 if 'link' not in properties:
1083 continue
1084 outfile = os.path.join(base_directory, filepath)
1085 # symlink doesn't exist on Windows. So the 'link' property should
1086 # never be specified for windows .isolated file.
1087 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1088 if 'm' in properties:
1089 lchmod = getattr(os, 'lchmod', None)
1090 if lchmod:
1091 lchmod(outfile, properties['m'])
1092
1093
1094def setup_commands(base_directory, cwd, cmd):
1095 """Correctly adjusts and then returns the required working directory
1096 and command needed to run the test.
1097 """
1098 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1099 cwd = os.path.join(base_directory, cwd)
1100 if not os.path.isdir(cwd):
1101 os.makedirs(cwd)
1102
1103 # Ensure paths are correctly separated on windows.
1104 cmd[0] = cmd[0].replace('/', os.path.sep)
1105 cmd = fix_python_path(cmd)
1106
1107 return cwd, cmd
1108
1109
1110def generate_remaining_files(files):
1111 """Generates a dictionary of all the remaining files to be downloaded."""
1112 remaining = {}
1113 for filepath, props in files:
1114 if 'h' in props:
1115 remaining.setdefault(props['h'], []).append((filepath, props))
1116
1117 return remaining
1118
1119
1120def download_test_data(isolated_hash, target_directory, remote):
1121 """Downloads the dependencies to the given directory."""
1122 if not os.path.exists(target_directory):
1123 os.makedirs(target_directory)
1124
1125 settings = Settings()
1126 no_cache = NoCache(target_directory, Remote(remote))
1127
1128 # Download all the isolated files.
1129 with Profiler('GetIsolateds') as _prof:
1130 settings.load(no_cache, isolated_hash)
1131
1132 if not settings.command:
1133 print >> sys.stderr, 'No command to run'
1134 return 1
1135
1136 with Profiler('GetRest') as _prof:
1137 create_directories(target_directory, settings.files)
1138 create_links(target_directory, settings.files.iteritems())
1139
1140 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1141 settings.command[:])
1142
1143 remaining = generate_remaining_files(settings.files.iteritems())
1144
1145 # Now block on the remaining files to be downloaded and mapped.
1146 logging.info('Retrieving remaining files')
1147 last_update = time.time()
1148 while remaining:
1149 obj = no_cache.wait_for(remaining)
1150 files = remaining.pop(obj)
1151
1152 for i, (filepath, properties) in enumerate(files):
1153 outfile = os.path.join(target_directory, filepath)
1154 logging.info(no_cache.path(obj))
1155
1156 if i + 1 == len(files):
1157 os.rename(no_cache.path(obj), outfile)
1158 else:
1159 shutil.copyfile(no_cache.path(obj), outfile)
1160
1161 if 'm' in properties:
1162 # It's not set on Windows.
1163 os.chmod(outfile, properties['m'])
1164
1165 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1166 logging.info('%d files remaining...' % len(remaining))
1167 last_update = time.time()
1168
1169 print('.isolated files successfully downloaded and setup in %s' %
1170 target_directory)
1171 print('To run this test please run the command %s from the directory %s' %
1172 (cmd, cwd))
1173
1174 return 0
1175
1176
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001177def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001178 """Downloads the dependencies in the cache, hardlinks them into a temporary
1179 directory and runs the executable.
1180 """
1181 settings = Settings()
1182 with Cache(cache_dir, Remote(remote), policies) as cache:
1183 outdir = make_temp_dir('run_tha_test', cache_dir)
1184 try:
1185 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001186 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001187 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001188 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001189 # Adds it in the cache. While not strictly necessary, this simplifies
1190 # the rest.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001191 h = hashlib.sha1(open(isolated_hash, 'r').read()).hexdigest()
1192 cache.add(isolated_hash, h)
1193 isolated_hash = h
1194 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001195
1196 if not settings.command:
1197 print >> sys.stderr, 'No command to run'
1198 return 1
1199
1200 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001201 create_directories(outdir, settings.files)
1202 create_links(outdir, settings.files.iteritems())
1203 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001204
1205 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001206 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1207 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001208
1209 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001210 logging.info('Retrieving remaining files')
1211 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001212 while remaining:
1213 obj = cache.wait_for(remaining)
1214 for filepath, properties in remaining.pop(obj):
1215 outfile = os.path.join(outdir, filepath)
1216 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001217 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001218 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001219 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001220
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001221 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1222 logging.info('%d files remaining...' % len(remaining))
1223 last_update = time.time()
1224
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001225 if settings.read_only:
1226 make_writable(outdir, True)
1227 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001228
1229 # TODO(csharp): This should be specified somewhere else.
1230 # Add a rotating log file if one doesn't already exist.
1231 env = os.environ.copy()
1232 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001233 try:
1234 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001235 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001236 except OSError:
1237 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1238 raise
1239 finally:
1240 rmtree(outdir)
1241
1242
1243def main():
1244 parser = optparse.OptionParser(
1245 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1246 parser.add_option(
1247 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1248 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1249
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001250 group = optparse.OptionGroup(parser, 'Download')
1251 group.add_option(
1252 '--download', metavar='DEST',
1253 help='Downloads files to DEST and returns without running, instead of '
1254 'downloading and then running from a temporary directory.')
1255 parser.add_option_group(group)
1256
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001257 group = optparse.OptionGroup(parser, 'Data source')
1258 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001259 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001260 metavar='FILE',
1261 help='File/url describing what to map or run')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001262 # TODO(maruel): Remove once not used anymore.
1263 group.add_option(
1264 '-m', '--manifest', dest='isolated', help=optparse.SUPPRESS_HELP)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001265 group.add_option(
1266 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001267 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001268 parser.add_option_group(group)
1269
1270 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001271 '-r', '--remote', metavar='URL',
1272 default=
1273 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1274 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001275 group = optparse.OptionGroup(parser, 'Cache management')
1276 group.add_option(
1277 '--cache',
1278 default='cache',
1279 metavar='DIR',
1280 help='Cache directory, default=%default')
1281 group.add_option(
1282 '--max-cache-size',
1283 type='int',
1284 metavar='NNN',
1285 default=20*1024*1024*1024,
1286 help='Trim if the cache gets larger than this value, default=%default')
1287 group.add_option(
1288 '--min-free-space',
1289 type='int',
1290 metavar='NNN',
1291 default=1*1024*1024*1024,
1292 help='Trim if disk free space becomes lower than this value, '
1293 'default=%default')
1294 group.add_option(
1295 '--max-items',
1296 type='int',
1297 metavar='NNN',
1298 default=100000,
1299 help='Trim if more than this number of items are in the cache '
1300 'default=%default')
1301 parser.add_option_group(group)
1302
1303 options, args = parser.parse_args()
1304 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001305
1306 logging_console = logging.StreamHandler()
1307 logging_console.setFormatter(logging.Formatter(
1308 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1309 logging_console.setLevel(level)
1310 logging.getLogger().addHandler(logging_console)
1311
1312 logging_rotating_file = logging.handlers.RotatingFileHandler(
1313 RUN_ISOLATED_LOG_FILE,
1314 maxBytes=10 * 1024 * 1024, backupCount=5)
1315 logging_rotating_file.setLevel(logging.DEBUG)
1316 logging_rotating_file.setFormatter(logging.Formatter(
1317 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1318 logging.getLogger().addHandler(logging_rotating_file)
1319
1320 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001321
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001322 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001323 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001324 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001325 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001326 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001327 parser.error('Unsupported args %s' % ' '.join(args))
1328
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001329 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001330 policies = CachePolicies(
1331 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001332
1333 if options.download:
1334 return download_test_data(options.isolated or options.hash,
1335 options.download, options.remote)
1336 else:
1337 try:
1338 return run_tha_test(
1339 options.isolated or options.hash,
1340 options.cache,
1341 options.remote,
1342 policies)
1343 except Exception, e:
1344 # Make sure any exception is logged.
1345 logging.exception(e)
1346 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001347
1348
1349if __name__ == '__main__':
1350 sys.exit(main())