blob: 8ed9452e7fd862026d71e2400537aad01942aaa1 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000011import ctypes
12import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000013import httplib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000014import json
15import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000016import logging.handlers
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000017import optparse
18import os
19import Queue
20import re
21import shutil
22import stat
23import subprocess
24import sys
25import tempfile
26import threading
27import time
28import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000029import urllib2
30import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000031
32
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000033# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000034HARDLINK, SYMLINK, COPY = range(1, 4)
35
36RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
37
csharp@chromium.org8dc52542012-11-08 20:29:55 +000038# The file size to be used when we don't know the correct file size,
39# generally used for .isolated files.
40UNKNOWN_FILE_SIZE = None
41
csharp@chromium.orga92403f2012-11-20 15:13:59 +000042# The size of each chunk to read when downloading and unzipping files.
43ZIPPED_FILE_CHUNK = 16 * 1024
44
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000045# The name of the log file to use.
46RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
47
csharp@chromium.orge217f302012-11-22 16:51:53 +000048# The base directory containing this file.
49BASE_DIR = os.path.dirname(os.path.abspath(__file__))
50
51# The name of the log to use for the run_test_cases.py command
52RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
53
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000054# The delay (in seconds) to wait between logging statements when retrieving
55# the required files. This is intended to let the user (or buildbot) know that
56# the program is still running.
57DELAY_BETWEEN_UPDATES_IN_SECS = 30
58
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000059
60class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +000061 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000062 pass
63
64
65class MappingError(OSError):
66 """Failed to recreate the tree."""
67 pass
68
69
csharp@chromium.orga92403f2012-11-20 15:13:59 +000070class DownloadFileOpener(urllib.FancyURLopener):
71 """This class is needed to get urlretrive to raise an exception on
72 404 errors, instead of still writing to the file with the error code.
73 """
74 def http_error_default(self, url, fp, errcode, errmsg, headers):
75 raise urllib2.HTTPError(url, errcode, errmsg, headers, fp)
76
77
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000078def get_flavor():
79 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
80 flavors = {
81 'cygwin': 'win',
82 'win32': 'win',
83 'darwin': 'mac',
84 'sunos5': 'solaris',
85 'freebsd7': 'freebsd',
86 'freebsd8': 'freebsd',
87 }
88 return flavors.get(sys.platform, 'linux')
89
90
91def os_link(source, link_name):
92 """Add support for os.link() on Windows."""
93 if sys.platform == 'win32':
94 if not ctypes.windll.kernel32.CreateHardLinkW(
95 unicode(link_name), unicode(source), 0):
96 raise OSError()
97 else:
98 os.link(source, link_name)
99
100
101def readable_copy(outfile, infile):
102 """Makes a copy of the file that is readable by everyone."""
103 shutil.copy(infile, outfile)
104 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
105 stat.S_IRGRP | stat.S_IROTH)
106 os.chmod(outfile, read_enabled_mode)
107
108
109def link_file(outfile, infile, action):
110 """Links a file. The type of link depends on |action|."""
111 logging.debug('Mapping %s to %s' % (infile, outfile))
112 if action not in (HARDLINK, SYMLINK, COPY):
113 raise ValueError('Unknown mapping action %s' % action)
114 if not os.path.isfile(infile):
115 raise MappingError('%s is missing' % infile)
116 if os.path.isfile(outfile):
117 raise MappingError(
118 '%s already exist; insize:%d; outsize:%d' %
119 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
120
121 if action == COPY:
122 readable_copy(outfile, infile)
123 elif action == SYMLINK and sys.platform != 'win32':
124 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000125 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000126 else:
127 try:
128 os_link(infile, outfile)
129 except OSError:
130 # Probably a different file system.
131 logging.warn(
132 'Failed to hardlink, failing back to copy %s to %s' % (
133 infile, outfile))
134 readable_copy(outfile, infile)
135
136
137def _set_write_bit(path, read_only):
138 """Sets or resets the executable bit on a file or directory."""
139 mode = os.lstat(path).st_mode
140 if read_only:
141 mode = mode & 0500
142 else:
143 mode = mode | 0200
144 if hasattr(os, 'lchmod'):
145 os.lchmod(path, mode) # pylint: disable=E1101
146 else:
147 if stat.S_ISLNK(mode):
148 # Skip symlink without lchmod() support.
149 logging.debug('Can\'t change +w bit on symlink %s' % path)
150 return
151
152 # TODO(maruel): Implement proper DACL modification on Windows.
153 os.chmod(path, mode)
154
155
156def make_writable(root, read_only):
157 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000158 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000159 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
160 for filename in filenames:
161 _set_write_bit(os.path.join(dirpath, filename), read_only)
162
163 for dirname in dirnames:
164 _set_write_bit(os.path.join(dirpath, dirname), read_only)
165
166
167def rmtree(root):
168 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
169 make_writable(root, False)
170 if sys.platform == 'win32':
171 for i in range(3):
172 try:
173 shutil.rmtree(root)
174 break
175 except WindowsError: # pylint: disable=E0602
176 delay = (i+1)*2
177 print >> sys.stderr, (
178 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
179 time.sleep(delay)
180 else:
181 shutil.rmtree(root)
182
183
184def is_same_filesystem(path1, path2):
185 """Returns True if both paths are on the same filesystem.
186
187 This is required to enable the use of hardlinks.
188 """
189 assert os.path.isabs(path1), path1
190 assert os.path.isabs(path2), path2
191 if sys.platform == 'win32':
192 # If the drive letter mismatches, assume it's a separate partition.
193 # TODO(maruel): It should look at the underlying drive, a drive letter could
194 # be a mount point to a directory on another drive.
195 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
196 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
197 if path1[0].lower() != path2[0].lower():
198 return False
199 return os.stat(path1).st_dev == os.stat(path2).st_dev
200
201
202def get_free_space(path):
203 """Returns the number of free bytes."""
204 if sys.platform == 'win32':
205 free_bytes = ctypes.c_ulonglong(0)
206 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
207 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
208 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000209 # For OSes other than Windows.
210 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000211 return f.f_bfree * f.f_frsize
212
213
214def make_temp_dir(prefix, root_dir):
215 """Returns a temporary directory on the same file system as root_dir."""
216 base_temp_dir = None
217 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
218 base_temp_dir = os.path.dirname(root_dir)
219 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
220
221
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000222def load_isolated(content):
223 """Verifies the .isolated file is valid and loads this object with the json
224 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000225 """
226 try:
227 data = json.loads(content)
228 except ValueError:
229 raise ConfigError('Failed to parse: %s...' % content[:100])
230
231 if not isinstance(data, dict):
232 raise ConfigError('Expected dict, got %r' % data)
233
234 for key, value in data.iteritems():
235 if key == 'command':
236 if not isinstance(value, list):
237 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000238 if not value:
239 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000240 for subvalue in value:
241 if not isinstance(subvalue, basestring):
242 raise ConfigError('Expected string, got %r' % subvalue)
243
244 elif key == 'files':
245 if not isinstance(value, dict):
246 raise ConfigError('Expected dict, got %r' % value)
247 for subkey, subvalue in value.iteritems():
248 if not isinstance(subkey, basestring):
249 raise ConfigError('Expected string, got %r' % subkey)
250 if not isinstance(subvalue, dict):
251 raise ConfigError('Expected dict, got %r' % subvalue)
252 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000253 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000254 if not isinstance(subsubvalue, basestring):
255 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000256 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000257 if not isinstance(subsubvalue, int):
258 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000259 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000260 if not RE_IS_SHA1.match(subsubvalue):
261 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000262 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000263 if not isinstance(subsubvalue, int):
264 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000265 else:
266 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000267 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000268 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000269 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
270 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000271
272 elif key == 'includes':
273 if not isinstance(value, list):
274 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000275 if not value:
276 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000277 for subvalue in value:
278 if not RE_IS_SHA1.match(subvalue):
279 raise ConfigError('Expected sha-1, got %r' % subvalue)
280
281 elif key == 'read_only':
282 if not isinstance(value, bool):
283 raise ConfigError('Expected bool, got %r' % value)
284
285 elif key == 'relative_cwd':
286 if not isinstance(value, basestring):
287 raise ConfigError('Expected string, got %r' % value)
288
289 elif key == 'os':
290 if value != get_flavor():
291 raise ConfigError(
292 'Expected \'os\' to be \'%s\' but got \'%s\'' %
293 (get_flavor(), value))
294
295 else:
296 raise ConfigError('Unknown key %s' % key)
297
298 return data
299
300
301def fix_python_path(cmd):
302 """Returns the fixed command line to call the right python executable."""
303 out = cmd[:]
304 if out[0] == 'python':
305 out[0] = sys.executable
306 elif out[0].endswith('.py'):
307 out.insert(0, sys.executable)
308 return out
309
310
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000311class ThreadPool(object):
312 """Implements a multithreaded worker pool oriented for mapping jobs with
313 thread-local result storage.
314 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000315 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000316
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000317 def __init__(self, num_threads, queue_size=0):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000318 logging.debug('Creating ThreadPool')
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000319 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000320 self._workers = [
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000321 threading.Thread(target=self._run, name='worker-%d' % i)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000322 for i in range(num_threads)
323 ]
324
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000325 self._lock = threading.Lock()
326 self._num_of_added_tasks = 0
327 self._outputs = []
328 self._exceptions = []
329
330 for w in self._workers:
331 w.daemon = True
332 w.start()
333
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000334 def add_task(self, func, *args, **kwargs):
335 """Adds a task, a function to be executed by a worker.
336
337 The function's return value will be stored in the the worker's thread local
338 outputs list.
339 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000340 with self._lock:
341 self._num_of_added_tasks += 1
342 index = self._num_of_added_tasks
343 self.tasks.put((index, func, args, kwargs))
344
345 def _run(self):
346 """Runs until a None task is queued."""
347 while True:
348 task = self.tasks.get()
349 if task is None:
350 # We're done.
351 return
352 try:
353 # The first item is the index.
354 _, func, args, kwargs = task
355 out = func(*args, **kwargs)
356 with self._lock:
357 self._outputs.append(out)
358 except Exception as e:
359 logging.error('Caught exception! %s' % e)
360 exc_info = sys.exc_info()
361 with self._lock:
362 self._exceptions.append(exc_info)
363 finally:
364 self.tasks.task_done()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000365
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000366 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000367 """Extracts all the results from each threads unordered.
368
369 Call repeatedly to extract all the exceptions if desired.
370 """
371 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000372 self.tasks.join()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000373 if self._exceptions:
374 e = self._exceptions.pop(0)
375 raise e[0], e[1], e[2]
376 out = self._outputs
377 self._outputs = []
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000378 return out
379
380 def close(self):
381 """Closes all the threads."""
382 for _ in range(len(self._workers)):
383 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000384 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000385 for t in self._workers:
386 t.join()
387
388 def __enter__(self):
389 """Enables 'with' statement."""
390 return self
391
392 def __exit__(self, exc_type, exc_value, traceback):
393 """Enables 'with' statement."""
394 self.close()
395
396
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000397def valid_file(filepath, size):
398 """Determines if the given files appears valid (currently it just checks
399 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000400 if size == UNKNOWN_FILE_SIZE:
401 return True
402 actual_size = os.stat(filepath).st_size
403 if size != actual_size:
404 logging.warning(
405 'Found invalid item %s; %d != %d',
406 os.path.basename(filepath), actual_size, size)
407 return False
408 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000409
410
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000411class Profiler(object):
412 def __init__(self, name):
413 self.name = name
414 self.start_time = None
415
416 def __enter__(self):
417 self.start_time = time.time()
418 return self
419
420 def __exit__(self, _exc_type, _exec_value, _traceback):
421 time_taken = time.time() - self.start_time
422 logging.info('Profiling: Section %s took %3.3f seconds',
423 self.name, time_taken)
424
425
426class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000427 """Priority based worker queue to fetch or upload files from a
428 content-address server. Any function may be given as the fetcher/upload,
429 as long as it takes two inputs (the item contents, and their relative
430 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000431
432 Supports local file system, CIFS or http remotes.
433
434 When the priority of items is equals, works in strict FIFO mode.
435 """
436 # Initial and maximum number of worker threads.
437 INITIAL_WORKERS = 2
438 MAX_WORKERS = 16
439 # Priorities.
440 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
441 INTERNAL_PRIORITY_BITS = (1<<8) - 1
442 RETRIES = 5
443
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000444 def __init__(self, destination_root):
445 # Function to fetch a remote object or upload to a remote location..
446 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000447 # Contains tuple(priority, index, obj, destination).
448 self._queue = Queue.PriorityQueue()
449 # Contains tuple(priority, index, obj).
450 self._done = Queue.PriorityQueue()
451
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000452 # Contains generated exceptions that haven't been handled yet.
453 self._exceptions = Queue.Queue()
454
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000455 # To keep FIFO ordering in self._queue. It is assumed xrange's iterator is
456 # thread-safe.
457 self._next_index = xrange(0, 1<<30).__iter__().next
458
459 # Control access to the following member.
460 self._ready_lock = threading.Lock()
461 # Number of threads in wait state.
462 self._ready = 0
463
464 # Control access to the following member.
465 self._workers_lock = threading.Lock()
466 self._workers = []
467 for _ in range(self.INITIAL_WORKERS):
468 self._add_worker()
469
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000470 def join(self):
471 """Blocks until the queue is empty."""
472 self._queue.join()
473
474 def next_exception(self):
475 """Returns the next unhandled exception, or None if there is
476 no exception."""
477 try:
478 return self._exceptions.get_nowait()
479 except Queue.Empty:
480 return None
481
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000482 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000483 """Retrieves an object from the remote data store.
484
485 The smaller |priority| gets fetched first.
486
487 Thread-safe.
488 """
489 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000490 self._add_to_queue(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000491
492 def get_result(self):
493 """Returns the next file that was successfully fetched."""
494 r = self._done.get()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000495 if r[0] == -1:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000496 # It's an exception.
497 raise r[2][0], r[2][1], r[2][2]
498 return r[2]
499
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000500 def _add_to_queue(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000501 with self._ready_lock:
502 start_new_worker = not self._ready
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000503 self._queue.put((priority, self._next_index(), obj, dest, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000504 if start_new_worker:
505 self._add_worker()
506
507 def _add_worker(self):
508 """Add one worker thread if there isn't too many. Thread-safe."""
509 with self._workers_lock:
510 if len(self._workers) >= self.MAX_WORKERS:
511 return False
512 worker = threading.Thread(target=self._run)
513 self._workers.append(worker)
514 worker.daemon = True
515 worker.start()
516
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000517 def _step_done(self, result):
518 """Worker helper function"""
519 self._done.put(result)
520 self._queue.task_done()
521 if result[0] == -1:
522 self._exceptions.put(sys.exc_info())
523
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000524 def _run(self):
525 """Worker thread loop."""
526 while True:
527 try:
528 with self._ready_lock:
529 self._ready += 1
530 item = self._queue.get()
531 finally:
532 with self._ready_lock:
533 self._ready -= 1
534 if not item:
535 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000536 priority, index, obj, dest, size = item
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000537 try:
538 self._do_item(obj, dest)
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000539 if size and not valid_file(dest, size):
540 download_size = os.stat(dest).st_size
541 os.remove(dest)
542 raise IOError('File incorrect size after download of %s. Got %s and '
maruel@chromium.org3f039182012-11-27 21:32:41 +0000543 'expected %s' % (obj, download_size, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000544 except IOError:
545 # Retry a few times, lowering the priority.
546 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000547 self._add_to_queue(priority + 1, obj, dest, size)
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000548 self._queue.task_done()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000549 continue
550 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000551 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000552 except:
553 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000554 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000555 else:
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000556 self._step_done((priority, index, obj))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000557
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000558 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000559 """Returns a object to retrieve objects from a remote."""
560 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000561 def download_file(item, dest):
562 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
563 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000564 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +0000565 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000566 logging.debug('download_file(%s)', zipped_source)
567 connection = urllib2.urlopen(zipped_source)
568 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +0000569 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000570 with open(dest, 'wb') as f:
571 while True:
572 chunk = connection.read(ZIPPED_FILE_CHUNK)
573 if not chunk:
574 break
maruel@chromium.org3f039182012-11-27 21:32:41 +0000575 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000576 f.write(decompressor.decompress(chunk))
577 # Ensure that all the data was properly decompressed.
578 uncompressed_data = decompressor.flush()
579 assert not uncompressed_data
csharp@chromium.orga110d792013-01-07 16:16:16 +0000580 except httplib.HTTPException as e:
581 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +0000582 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +0000583 # Log the first bytes to see if it's uncompressed data.
584 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +0000585 raise IOError(
586 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
587 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000588
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000589 return download_file
590
591 def copy_file(item, dest):
592 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000593 if source == dest:
594 logging.info('Source and destination are the same, no action required')
595 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000596 logging.debug('copy_file(%s, %s)', source, dest)
597 shutil.copy(source, dest)
598 return copy_file
599
600
601class CachePolicies(object):
602 def __init__(self, max_cache_size, min_free_space, max_items):
603 """
604 Arguments:
605 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
606 cache is effectively a leak.
607 - min_free_space: Trim if disk free space becomes lower than this value. If
608 0, it unconditionally fill the disk.
609 - max_items: Maximum number of items to keep in the cache. If 0, do not
610 enforce a limit.
611 """
612 self.max_cache_size = max_cache_size
613 self.min_free_space = min_free_space
614 self.max_items = max_items
615
616
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000617class NoCache(object):
618 """This class is intended to be usable everywhere the Cache class is.
619 Instead of downloading to a cache, all files are downloaded to the target
620 directory and then moved to where they are needed.
621 """
622
623 def __init__(self, target_directory, remote):
624 self.target_directory = target_directory
625 self.remote = remote
626
627 def retrieve(self, priority, item, size):
628 """Get the request file."""
629 self.remote.add_item(priority, item, self.path(item), size)
630 self.remote.get_result()
631
632 def wait_for(self, items):
633 """Download the first item of the given list if it is missing."""
634 item = items.iterkeys().next()
635
636 if not os.path.exists(self.path(item)):
637 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
638 downloaded = self.remote.get_result()
639 assert downloaded == item
640
641 return item
642
643 def path(self, item):
644 return os.path.join(self.target_directory, item)
645
646
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000647class Cache(object):
648 """Stateful LRU cache.
649
650 Saves its state as json file.
651 """
652 STATE_FILE = 'state.json'
653
654 def __init__(self, cache_dir, remote, policies):
655 """
656 Arguments:
657 - cache_dir: Directory where to place the cache.
658 - remote: Remote where to fetch items from.
659 - policies: cache retention policies.
660 """
661 self.cache_dir = cache_dir
662 self.remote = remote
663 self.policies = policies
664 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
665 # The tuple(file, size) are kept as an array in a LRU style. E.g.
666 # self.state[0] is the oldest item.
667 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +0000668 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000669 # A lookup map to speed up searching.
670 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +0000671 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000672
673 # Items currently being fetched. Keep it local to reduce lock contention.
674 self._pending_queue = set()
675
676 # Profiling values.
677 self._added = []
678 self._removed = []
679 self._free_disk = 0
680
maruel@chromium.org770993b2012-12-11 17:16:48 +0000681 with Profiler('Setup'):
682 if not os.path.isdir(self.cache_dir):
683 os.makedirs(self.cache_dir)
684 if os.path.isfile(self.state_file):
685 try:
686 self.state = json.load(open(self.state_file, 'r'))
687 except (IOError, ValueError), e:
688 # Too bad. The file will be overwritten and the cache cleared.
689 logging.error(
690 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
691 self._state_need_to_be_saved = True
692 if (not isinstance(self.state, list) or
693 not all(
694 isinstance(i, (list, tuple)) and len(i) == 2
695 for i in self.state)):
696 # Discard.
697 self._state_need_to_be_saved = True
698 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000699
maruel@chromium.org770993b2012-12-11 17:16:48 +0000700 # Ensure that all files listed in the state still exist and add new ones.
701 previous = set(filename for filename, _ in self.state)
702 if len(previous) != len(self.state):
703 logging.warn('Cache state is corrupted, found duplicate files')
704 self._state_need_to_be_saved = True
705 self.state = []
706
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000707 added = 0
708 for filename in os.listdir(self.cache_dir):
709 if filename == self.STATE_FILE:
710 continue
711 if filename in previous:
712 previous.remove(filename)
713 continue
714 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000715 if not RE_IS_SHA1.match(filename):
716 logging.warn('Removing unknown file %s from cache', filename)
717 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +0000718 continue
719 # Insert as the oldest file. It will be deleted eventually if not
720 # accessed.
721 self._add(filename, False)
722 logging.warn('Add unknown file %s to cache', filename)
723 added += 1
724
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000725 if added:
726 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000727 if previous:
728 logging.warn('Removed %d lost files', len(previous))
729 # Set explicitly in case self._add() wasn't called.
730 self._state_need_to_be_saved = True
731 # Filter out entries that were not found while keeping the previous
732 # order.
733 self.state = [
734 (filename, size) for filename, size in self.state
735 if filename not in previous
736 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000737 self.trim()
738
739 def __enter__(self):
740 return self
741
742 def __exit__(self, _exc_type, _exec_value, _traceback):
743 with Profiler('CleanupTrimming'):
744 self.trim()
745
746 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000747 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000748 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000749 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000750 len(self.state),
751 sum(i[1] for i in self.state) / 1024)
752 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000753 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
754 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000755
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000756 def remove_file_at_index(self, index):
757 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000758 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +0000759 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000760 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000761 # If the lookup was already stale, its possible the filename was not
762 # present yet.
763 self._lookup_is_stale = True
764 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000765 self._removed.append(size)
766 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000767 except OSError as e:
768 logging.error('Error attempting to delete a file\n%s' % e)
769
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000770 def remove_lru_file(self):
771 """Removes the last recently used file."""
772 self.remove_file_at_index(0)
773
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000774 def trim(self):
775 """Trims anything we don't know, make sure enough free space exists."""
776 # Ensure maximum cache size.
777 if self.policies.max_cache_size and self.state:
778 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
779 self.remove_lru_file()
780
781 # Ensure maximum number of items in the cache.
782 if self.policies.max_items and self.state:
783 while len(self.state) > self.policies.max_items:
784 self.remove_lru_file()
785
786 # Ensure enough free space.
787 self._free_disk = get_free_space(self.cache_dir)
788 while (
789 self.policies.min_free_space and
790 self.state and
791 self._free_disk < self.policies.min_free_space):
792 self.remove_lru_file()
793 self._free_disk = get_free_space(self.cache_dir)
794
795 self.save()
796
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000797 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000798 """Retrieves a file from the remote, if not already cached, and adds it to
799 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000800
801 If the file is in the cache, verifiy that the file is valid (i.e. it is
802 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000803 """
804 assert not '/' in item
805 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000806 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000807 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000808
809 if index is not None:
810 if not valid_file(self.path(item), size):
811 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000812 index = None
813 else:
814 assert index < len(self.state)
815 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +0000816 self._state_need_to_be_saved = True
817 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000818 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000819
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000820 if index is None:
821 if item in self._pending_queue:
822 # Already pending. The same object could be referenced multiple times.
823 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000824 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000825 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000826
827 def add(self, filepath, obj):
828 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000829 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000830 if not obj in self._lookup:
831 link_file(self.path(obj), filepath, HARDLINK)
832 self._add(obj, True)
833
834 def path(self, item):
835 """Returns the path to one item."""
836 return os.path.join(self.cache_dir, item)
837
838 def save(self):
839 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000840 if self._state_need_to_be_saved:
841 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
842 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000843
844 def wait_for(self, items):
845 """Starts a loop that waits for at least one of |items| to be retrieved.
846
847 Returns the first item retrieved.
848 """
849 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +0000850 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000851 for item in items:
852 if item in self._lookup:
853 return item
854
855 assert all(i in self._pending_queue for i in items), (
856 items, self._pending_queue)
857 # Note that:
858 # len(self._pending_queue) ==
859 # ( len(self.remote._workers) - self.remote._ready +
860 # len(self._remote._queue) + len(self._remote.done))
861 # There is no lock-free way to verify that.
862 while self._pending_queue:
863 item = self.remote.get_result()
864 self._pending_queue.remove(item)
865 self._add(item, True)
866 if item in items:
867 return item
868
869 def _add(self, item, at_end):
870 """Adds an item in the internal state.
871
872 If |at_end| is False, self._lookup becomes inconsistent and
873 self._update_lookup() must be called.
874 """
875 size = os.stat(self.path(item)).st_size
876 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000877 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000878 if at_end:
879 self.state.append((item, size))
880 self._lookup[item] = len(self.state) - 1
881 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +0000882 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000883 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000884
885 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +0000886 if self._lookup_is_stale:
887 self._lookup = dict(
888 (filename, index) for index, (filename, _) in enumerate(self.state))
889 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000890
891
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000892class IsolatedFile(object):
893 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000894 def __init__(self, obj_hash):
895 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000896 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000897 self.obj_hash = obj_hash
898 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000899 # .isolate and all the .isolated files recursively included by it with
900 # 'includes' key. The order of each sha-1 in 'includes', each representing a
901 # .isolated file in the hash table, is important, as the later ones are not
902 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000903 self.can_fetch = False
904
905 # Raw data.
906 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000907 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000908 self.children = []
909
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000910 # Set once the .isolated file is loaded.
911 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000912 # Set once the files are fetched.
913 self.files_fetched = False
914
915 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000916 """Verifies the .isolated file is valid and loads this object with the json
917 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000918 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000919 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
920 assert not self._is_parsed
921 self.data = load_isolated(content)
922 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
923 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000924
925 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000926 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000927
928 Preemptively request files.
929
930 Note that |files| is modified by this function.
931 """
932 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000933 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000934 return
935 logging.debug('fetch_files(%s)' % self.obj_hash)
936 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000937 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000938 # overriden files must not be fetched.
939 if filepath not in files:
940 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000941 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000942 # Preemptively request files.
943 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000944 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000945 self.files_fetched = True
946
947
948class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000949 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000950 def __init__(self):
951 self.command = []
952 self.files = {}
953 self.read_only = None
954 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000955 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000956 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000957
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000958 def load(self, cache, root_isolated_hash):
959 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000960
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000961 It enables support for "included" .isolated files. They are processed in
962 strict order but fetched asynchronously from the cache. This is important so
963 that a file in an included .isolated file that is overridden by an embedding
964 .isolated file is not fetched neededlessly. The includes are fetched in one
965 pass and the files are fetched as soon as all the ones on the left-side
966 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000967
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000968 The prioritization is very important here for nested .isolated files.
969 'includes' have the highest priority and the algorithm is optimized for both
970 deep and wide trees. A deep one is a long link of .isolated files referenced
971 one at a time by one item in 'includes'. A wide one has a large number of
972 'includes' in a single .isolated file. 'left' is defined as an included
973 .isolated file earlier in the 'includes' list. So the order of the elements
974 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000975 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000976 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000977 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000978 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000979 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000980 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000981
982 def update_self(node):
983 node.fetch_files(cache, self.files)
984 # Grabs properties.
985 if not self.command and node.data.get('command'):
986 self.command = node.data['command']
987 if self.read_only is None and node.data.get('read_only') is not None:
988 self.read_only = node.data['read_only']
989 if (self.relative_cwd is None and
990 node.data.get('relative_cwd') is not None):
991 self.relative_cwd = node.data['relative_cwd']
992
993 def traverse_tree(node):
994 if node.can_fetch:
995 if not node.files_fetched:
996 update_self(node)
997 will_break = False
998 for i in node.children:
999 if not i.can_fetch:
1000 if will_break:
1001 break
1002 # Automatically mark the first one as fetcheable.
1003 i.can_fetch = True
1004 will_break = True
1005 traverse_tree(i)
1006
1007 while pending:
1008 item_hash = cache.wait_for(pending)
1009 item = pending.pop(item_hash)
1010 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001011 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001012 # It's the root item.
1013 item.can_fetch = True
1014
1015 for new_child in item.children:
1016 h = new_child.obj_hash
1017 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001018 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001019 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001020 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001021
1022 # Traverse the whole tree to see if files can now be fetched.
1023 traverse_tree(self.root)
1024 def check(n):
1025 return all(check(x) for x in n.children) and n.files_fetched
1026 assert check(self.root)
1027 self.relative_cwd = self.relative_cwd or ''
1028 self.read_only = self.read_only or False
1029
1030
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001031def create_directories(base_directory, files):
1032 """Creates the directory structure needed by the given list of files."""
1033 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1034 # Creates the tree of directories to create.
1035 directories = set(os.path.dirname(f) for f in files)
1036 for item in list(directories):
1037 while item:
1038 directories.add(item)
1039 item = os.path.dirname(item)
1040 for d in sorted(directories):
1041 if d:
1042 os.mkdir(os.path.join(base_directory, d))
1043
1044
1045def create_links(base_directory, files):
1046 """Creates any links needed by the given set of files."""
1047 for filepath, properties in files:
1048 if 'link' not in properties:
1049 continue
1050 outfile = os.path.join(base_directory, filepath)
1051 # symlink doesn't exist on Windows. So the 'link' property should
1052 # never be specified for windows .isolated file.
1053 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1054 if 'm' in properties:
1055 lchmod = getattr(os, 'lchmod', None)
1056 if lchmod:
1057 lchmod(outfile, properties['m'])
1058
1059
1060def setup_commands(base_directory, cwd, cmd):
1061 """Correctly adjusts and then returns the required working directory
1062 and command needed to run the test.
1063 """
1064 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1065 cwd = os.path.join(base_directory, cwd)
1066 if not os.path.isdir(cwd):
1067 os.makedirs(cwd)
1068
1069 # Ensure paths are correctly separated on windows.
1070 cmd[0] = cmd[0].replace('/', os.path.sep)
1071 cmd = fix_python_path(cmd)
1072
1073 return cwd, cmd
1074
1075
1076def generate_remaining_files(files):
1077 """Generates a dictionary of all the remaining files to be downloaded."""
1078 remaining = {}
1079 for filepath, props in files:
1080 if 'h' in props:
1081 remaining.setdefault(props['h'], []).append((filepath, props))
1082
1083 return remaining
1084
1085
1086def download_test_data(isolated_hash, target_directory, remote):
1087 """Downloads the dependencies to the given directory."""
1088 if not os.path.exists(target_directory):
1089 os.makedirs(target_directory)
1090
1091 settings = Settings()
1092 no_cache = NoCache(target_directory, Remote(remote))
1093
1094 # Download all the isolated files.
1095 with Profiler('GetIsolateds') as _prof:
1096 settings.load(no_cache, isolated_hash)
1097
1098 if not settings.command:
1099 print >> sys.stderr, 'No command to run'
1100 return 1
1101
1102 with Profiler('GetRest') as _prof:
1103 create_directories(target_directory, settings.files)
1104 create_links(target_directory, settings.files.iteritems())
1105
1106 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1107 settings.command[:])
1108
1109 remaining = generate_remaining_files(settings.files.iteritems())
1110
1111 # Now block on the remaining files to be downloaded and mapped.
1112 logging.info('Retrieving remaining files')
1113 last_update = time.time()
1114 while remaining:
1115 obj = no_cache.wait_for(remaining)
1116 files = remaining.pop(obj)
1117
1118 for i, (filepath, properties) in enumerate(files):
1119 outfile = os.path.join(target_directory, filepath)
1120 logging.info(no_cache.path(obj))
1121
1122 if i + 1 == len(files):
1123 os.rename(no_cache.path(obj), outfile)
1124 else:
1125 shutil.copyfile(no_cache.path(obj), outfile)
1126
1127 if 'm' in properties:
1128 # It's not set on Windows.
1129 os.chmod(outfile, properties['m'])
1130
1131 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1132 logging.info('%d files remaining...' % len(remaining))
1133 last_update = time.time()
1134
1135 print('.isolated files successfully downloaded and setup in %s' %
1136 target_directory)
1137 print('To run this test please run the command %s from the directory %s' %
1138 (cmd, cwd))
1139
1140 return 0
1141
1142
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001143def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001144 """Downloads the dependencies in the cache, hardlinks them into a temporary
1145 directory and runs the executable.
1146 """
1147 settings = Settings()
1148 with Cache(cache_dir, Remote(remote), policies) as cache:
1149 outdir = make_temp_dir('run_tha_test', cache_dir)
1150 try:
1151 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001152 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001153 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001154 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001155 # Adds it in the cache. While not strictly necessary, this simplifies
1156 # the rest.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001157 h = hashlib.sha1(open(isolated_hash, 'r').read()).hexdigest()
1158 cache.add(isolated_hash, h)
1159 isolated_hash = h
1160 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001161
1162 if not settings.command:
1163 print >> sys.stderr, 'No command to run'
1164 return 1
1165
1166 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001167 create_directories(outdir, settings.files)
1168 create_links(outdir, settings.files.iteritems())
1169 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001170
1171 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001172 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1173 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001174
1175 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001176 logging.info('Retrieving remaining files')
1177 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001178 while remaining:
1179 obj = cache.wait_for(remaining)
1180 for filepath, properties in remaining.pop(obj):
1181 outfile = os.path.join(outdir, filepath)
1182 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001183 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001184 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001185 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001186
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001187 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1188 logging.info('%d files remaining...' % len(remaining))
1189 last_update = time.time()
1190
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001191 if settings.read_only:
1192 make_writable(outdir, True)
1193 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001194
1195 # TODO(csharp): This should be specified somewhere else.
1196 # Add a rotating log file if one doesn't already exist.
1197 env = os.environ.copy()
1198 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001199 try:
1200 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001201 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001202 except OSError:
1203 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1204 raise
1205 finally:
1206 rmtree(outdir)
1207
1208
1209def main():
1210 parser = optparse.OptionParser(
1211 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1212 parser.add_option(
1213 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1214 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1215
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001216 group = optparse.OptionGroup(parser, 'Download')
1217 group.add_option(
1218 '--download', metavar='DEST',
1219 help='Downloads files to DEST and returns without running, instead of '
1220 'downloading and then running from a temporary directory.')
1221 parser.add_option_group(group)
1222
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001223 group = optparse.OptionGroup(parser, 'Data source')
1224 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001225 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001226 metavar='FILE',
1227 help='File/url describing what to map or run')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001228 # TODO(maruel): Remove once not used anymore.
1229 group.add_option(
1230 '-m', '--manifest', dest='isolated', help=optparse.SUPPRESS_HELP)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001231 group.add_option(
1232 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001233 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001234 parser.add_option_group(group)
1235
1236 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001237 '-r', '--remote', metavar='URL',
1238 default=
1239 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1240 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001241 group = optparse.OptionGroup(parser, 'Cache management')
1242 group.add_option(
1243 '--cache',
1244 default='cache',
1245 metavar='DIR',
1246 help='Cache directory, default=%default')
1247 group.add_option(
1248 '--max-cache-size',
1249 type='int',
1250 metavar='NNN',
1251 default=20*1024*1024*1024,
1252 help='Trim if the cache gets larger than this value, default=%default')
1253 group.add_option(
1254 '--min-free-space',
1255 type='int',
1256 metavar='NNN',
1257 default=1*1024*1024*1024,
1258 help='Trim if disk free space becomes lower than this value, '
1259 'default=%default')
1260 group.add_option(
1261 '--max-items',
1262 type='int',
1263 metavar='NNN',
1264 default=100000,
1265 help='Trim if more than this number of items are in the cache '
1266 'default=%default')
1267 parser.add_option_group(group)
1268
1269 options, args = parser.parse_args()
1270 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001271
1272 logging_console = logging.StreamHandler()
1273 logging_console.setFormatter(logging.Formatter(
1274 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1275 logging_console.setLevel(level)
1276 logging.getLogger().addHandler(logging_console)
1277
1278 logging_rotating_file = logging.handlers.RotatingFileHandler(
1279 RUN_ISOLATED_LOG_FILE,
1280 maxBytes=10 * 1024 * 1024, backupCount=5)
1281 logging_rotating_file.setLevel(logging.DEBUG)
1282 logging_rotating_file.setFormatter(logging.Formatter(
1283 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1284 logging.getLogger().addHandler(logging_rotating_file)
1285
1286 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001287
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001288 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001289 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001290 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001291 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001292 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001293 parser.error('Unsupported args %s' % ' '.join(args))
1294
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001295 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001296 policies = CachePolicies(
1297 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001298
1299 if options.download:
1300 return download_test_data(options.isolated or options.hash,
1301 options.download, options.remote)
1302 else:
1303 try:
1304 return run_tha_test(
1305 options.isolated or options.hash,
1306 options.cache,
1307 options.remote,
1308 policies)
1309 except Exception, e:
1310 # Make sure any exception is logged.
1311 logging.exception(e)
1312 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001313
1314
1315if __name__ == '__main__':
1316 sys.exit(main())