blob: 1964c9f3e6c6b62ec0657c1fa79fc9f792f1609c [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000011import ctypes
12import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000013import httplib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000014import json
15import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000016import logging.handlers
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000017import optparse
18import os
19import Queue
20import re
21import shutil
22import stat
23import subprocess
24import sys
25import tempfile
26import threading
27import time
28import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000029import urllib2
30import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000031
32
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000033# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000034HARDLINK, SYMLINK, COPY = range(1, 4)
35
36RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
37
csharp@chromium.org8dc52542012-11-08 20:29:55 +000038# The file size to be used when we don't know the correct file size,
39# generally used for .isolated files.
40UNKNOWN_FILE_SIZE = None
41
csharp@chromium.orga92403f2012-11-20 15:13:59 +000042# The size of each chunk to read when downloading and unzipping files.
43ZIPPED_FILE_CHUNK = 16 * 1024
44
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000045# The name of the log file to use.
46RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
47
csharp@chromium.orge217f302012-11-22 16:51:53 +000048# The base directory containing this file.
49BASE_DIR = os.path.dirname(os.path.abspath(__file__))
50
51# The name of the log to use for the run_test_cases.py command
52RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
53
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000054# The delay (in seconds) to wait between logging statements when retrieving
55# the required files. This is intended to let the user (or buildbot) know that
56# the program is still running.
57DELAY_BETWEEN_UPDATES_IN_SECS = 30
58
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000059
60class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +000061 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000062 pass
63
64
65class MappingError(OSError):
66 """Failed to recreate the tree."""
67 pass
68
69
csharp@chromium.orga92403f2012-11-20 15:13:59 +000070class DownloadFileOpener(urllib.FancyURLopener):
71 """This class is needed to get urlretrive to raise an exception on
72 404 errors, instead of still writing to the file with the error code.
73 """
74 def http_error_default(self, url, fp, errcode, errmsg, headers):
75 raise urllib2.HTTPError(url, errcode, errmsg, headers, fp)
76
77
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000078def get_flavor():
79 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
80 flavors = {
81 'cygwin': 'win',
82 'win32': 'win',
83 'darwin': 'mac',
84 'sunos5': 'solaris',
85 'freebsd7': 'freebsd',
86 'freebsd8': 'freebsd',
87 }
88 return flavors.get(sys.platform, 'linux')
89
90
91def os_link(source, link_name):
92 """Add support for os.link() on Windows."""
93 if sys.platform == 'win32':
94 if not ctypes.windll.kernel32.CreateHardLinkW(
95 unicode(link_name), unicode(source), 0):
96 raise OSError()
97 else:
98 os.link(source, link_name)
99
100
101def readable_copy(outfile, infile):
102 """Makes a copy of the file that is readable by everyone."""
103 shutil.copy(infile, outfile)
104 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
105 stat.S_IRGRP | stat.S_IROTH)
106 os.chmod(outfile, read_enabled_mode)
107
108
109def link_file(outfile, infile, action):
110 """Links a file. The type of link depends on |action|."""
111 logging.debug('Mapping %s to %s' % (infile, outfile))
112 if action not in (HARDLINK, SYMLINK, COPY):
113 raise ValueError('Unknown mapping action %s' % action)
114 if not os.path.isfile(infile):
115 raise MappingError('%s is missing' % infile)
116 if os.path.isfile(outfile):
117 raise MappingError(
118 '%s already exist; insize:%d; outsize:%d' %
119 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
120
121 if action == COPY:
122 readable_copy(outfile, infile)
123 elif action == SYMLINK and sys.platform != 'win32':
124 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000125 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000126 else:
127 try:
128 os_link(infile, outfile)
129 except OSError:
130 # Probably a different file system.
131 logging.warn(
132 'Failed to hardlink, failing back to copy %s to %s' % (
133 infile, outfile))
134 readable_copy(outfile, infile)
135
136
137def _set_write_bit(path, read_only):
138 """Sets or resets the executable bit on a file or directory."""
139 mode = os.lstat(path).st_mode
140 if read_only:
141 mode = mode & 0500
142 else:
143 mode = mode | 0200
144 if hasattr(os, 'lchmod'):
145 os.lchmod(path, mode) # pylint: disable=E1101
146 else:
147 if stat.S_ISLNK(mode):
148 # Skip symlink without lchmod() support.
149 logging.debug('Can\'t change +w bit on symlink %s' % path)
150 return
151
152 # TODO(maruel): Implement proper DACL modification on Windows.
153 os.chmod(path, mode)
154
155
156def make_writable(root, read_only):
157 """Toggle the writable bit on a directory tree."""
maruel@chromium.org61a9b3b2012-12-12 17:18:52 +0000158 assert os.path.isabs(root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000159 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
160 for filename in filenames:
161 _set_write_bit(os.path.join(dirpath, filename), read_only)
162
163 for dirname in dirnames:
164 _set_write_bit(os.path.join(dirpath, dirname), read_only)
165
166
167def rmtree(root):
168 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
169 make_writable(root, False)
170 if sys.platform == 'win32':
171 for i in range(3):
172 try:
173 shutil.rmtree(root)
174 break
175 except WindowsError: # pylint: disable=E0602
176 delay = (i+1)*2
177 print >> sys.stderr, (
178 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
179 time.sleep(delay)
180 else:
181 shutil.rmtree(root)
182
183
184def is_same_filesystem(path1, path2):
185 """Returns True if both paths are on the same filesystem.
186
187 This is required to enable the use of hardlinks.
188 """
189 assert os.path.isabs(path1), path1
190 assert os.path.isabs(path2), path2
191 if sys.platform == 'win32':
192 # If the drive letter mismatches, assume it's a separate partition.
193 # TODO(maruel): It should look at the underlying drive, a drive letter could
194 # be a mount point to a directory on another drive.
195 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
196 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
197 if path1[0].lower() != path2[0].lower():
198 return False
199 return os.stat(path1).st_dev == os.stat(path2).st_dev
200
201
202def get_free_space(path):
203 """Returns the number of free bytes."""
204 if sys.platform == 'win32':
205 free_bytes = ctypes.c_ulonglong(0)
206 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
207 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
208 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000209 # For OSes other than Windows.
210 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000211 return f.f_bfree * f.f_frsize
212
213
214def make_temp_dir(prefix, root_dir):
215 """Returns a temporary directory on the same file system as root_dir."""
216 base_temp_dir = None
217 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
218 base_temp_dir = os.path.dirname(root_dir)
219 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
220
221
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000222def load_isolated(content):
223 """Verifies the .isolated file is valid and loads this object with the json
224 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000225 """
226 try:
227 data = json.loads(content)
228 except ValueError:
229 raise ConfigError('Failed to parse: %s...' % content[:100])
230
231 if not isinstance(data, dict):
232 raise ConfigError('Expected dict, got %r' % data)
233
234 for key, value in data.iteritems():
235 if key == 'command':
236 if not isinstance(value, list):
237 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000238 if not value:
239 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000240 for subvalue in value:
241 if not isinstance(subvalue, basestring):
242 raise ConfigError('Expected string, got %r' % subvalue)
243
244 elif key == 'files':
245 if not isinstance(value, dict):
246 raise ConfigError('Expected dict, got %r' % value)
247 for subkey, subvalue in value.iteritems():
248 if not isinstance(subkey, basestring):
249 raise ConfigError('Expected string, got %r' % subkey)
250 if not isinstance(subvalue, dict):
251 raise ConfigError('Expected dict, got %r' % subvalue)
252 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000253 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000254 if not isinstance(subsubvalue, basestring):
255 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000256 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000257 if not isinstance(subsubvalue, int):
258 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000259 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000260 if not RE_IS_SHA1.match(subsubvalue):
261 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000262 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000263 if not isinstance(subsubvalue, int):
264 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000265 else:
266 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000267 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000268 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000269 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
270 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000271
272 elif key == 'includes':
273 if not isinstance(value, list):
274 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000275 if not value:
276 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000277 for subvalue in value:
278 if not RE_IS_SHA1.match(subvalue):
279 raise ConfigError('Expected sha-1, got %r' % subvalue)
280
281 elif key == 'read_only':
282 if not isinstance(value, bool):
283 raise ConfigError('Expected bool, got %r' % value)
284
285 elif key == 'relative_cwd':
286 if not isinstance(value, basestring):
287 raise ConfigError('Expected string, got %r' % value)
288
289 elif key == 'os':
290 if value != get_flavor():
291 raise ConfigError(
292 'Expected \'os\' to be \'%s\' but got \'%s\'' %
293 (get_flavor(), value))
294
295 else:
296 raise ConfigError('Unknown key %s' % key)
297
298 return data
299
300
301def fix_python_path(cmd):
302 """Returns the fixed command line to call the right python executable."""
303 out = cmd[:]
304 if out[0] == 'python':
305 out[0] = sys.executable
306 elif out[0].endswith('.py'):
307 out.insert(0, sys.executable)
308 return out
309
310
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000311class WorkerThread(threading.Thread):
312 """Keeps the results of each task in a thread-local outputs variable."""
313 def __init__(self, tasks, *args, **kwargs):
314 super(WorkerThread, self).__init__(*args, **kwargs)
315 self._tasks = tasks
316 self.outputs = []
317 self.exceptions = []
318
319 self.daemon = True
320 self.start()
321
322 def run(self):
323 """Runs until a None task is queued."""
324 while True:
325 task = self._tasks.get()
326 if task is None:
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000327 # We're done.
328 return
329 try:
330 func, args, kwargs = task
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000331 self.outputs.append(func(*args, **kwargs))
332 except Exception, e:
333 logging.error('Caught exception! %s' % e)
334 self.exceptions.append(sys.exc_info())
335 finally:
336 self._tasks.task_done()
337
338
339class ThreadPool(object):
340 """Implements a multithreaded worker pool oriented for mapping jobs with
341 thread-local result storage.
342 """
343 QUEUE_CLASS = Queue.Queue
344
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000345 def __init__(self, num_threads, queue_size=0):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000346 logging.debug('Creating ThreadPool')
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000347 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000348 self._workers = [
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000349 WorkerThread(self.tasks, name='worker-%d' % i)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000350 for i in range(num_threads)
351 ]
352
353 def add_task(self, func, *args, **kwargs):
354 """Adds a task, a function to be executed by a worker.
355
356 The function's return value will be stored in the the worker's thread local
357 outputs list.
358 """
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000359 self.tasks.put((func, args, kwargs))
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000360
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000361 def join(self):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000362 """Extracts all the results from each threads unordered."""
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000363 self.tasks.join()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000364 out = []
365 # Look for exceptions.
366 for w in self._workers:
367 if w.exceptions:
368 raise w.exceptions[0][0], w.exceptions[0][1], w.exceptions[0][2]
369 out.extend(w.outputs)
370 w.outputs = []
371 return out
372
373 def close(self):
374 """Closes all the threads."""
375 for _ in range(len(self._workers)):
376 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000377 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000378 for t in self._workers:
379 t.join()
380
381 def __enter__(self):
382 """Enables 'with' statement."""
383 return self
384
385 def __exit__(self, exc_type, exc_value, traceback):
386 """Enables 'with' statement."""
387 self.close()
388
389
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000390def valid_file(filepath, size):
391 """Determines if the given files appears valid (currently it just checks
392 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000393 if size == UNKNOWN_FILE_SIZE:
394 return True
395 actual_size = os.stat(filepath).st_size
396 if size != actual_size:
397 logging.warning(
398 'Found invalid item %s; %d != %d',
399 os.path.basename(filepath), actual_size, size)
400 return False
401 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000402
403
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000404class Profiler(object):
405 def __init__(self, name):
406 self.name = name
407 self.start_time = None
408
409 def __enter__(self):
410 self.start_time = time.time()
411 return self
412
413 def __exit__(self, _exc_type, _exec_value, _traceback):
414 time_taken = time.time() - self.start_time
415 logging.info('Profiling: Section %s took %3.3f seconds',
416 self.name, time_taken)
417
418
419class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000420 """Priority based worker queue to fetch or upload files from a
421 content-address server. Any function may be given as the fetcher/upload,
422 as long as it takes two inputs (the item contents, and their relative
423 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000424
425 Supports local file system, CIFS or http remotes.
426
427 When the priority of items is equals, works in strict FIFO mode.
428 """
429 # Initial and maximum number of worker threads.
430 INITIAL_WORKERS = 2
431 MAX_WORKERS = 16
432 # Priorities.
433 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
434 INTERNAL_PRIORITY_BITS = (1<<8) - 1
435 RETRIES = 5
436
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000437 def __init__(self, destination_root):
438 # Function to fetch a remote object or upload to a remote location..
439 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000440 # Contains tuple(priority, index, obj, destination).
441 self._queue = Queue.PriorityQueue()
442 # Contains tuple(priority, index, obj).
443 self._done = Queue.PriorityQueue()
444
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000445 # Contains generated exceptions that haven't been handled yet.
446 self._exceptions = Queue.Queue()
447
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000448 # To keep FIFO ordering in self._queue. It is assumed xrange's iterator is
449 # thread-safe.
450 self._next_index = xrange(0, 1<<30).__iter__().next
451
452 # Control access to the following member.
453 self._ready_lock = threading.Lock()
454 # Number of threads in wait state.
455 self._ready = 0
456
457 # Control access to the following member.
458 self._workers_lock = threading.Lock()
459 self._workers = []
460 for _ in range(self.INITIAL_WORKERS):
461 self._add_worker()
462
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000463 def join(self):
464 """Blocks until the queue is empty."""
465 self._queue.join()
466
467 def next_exception(self):
468 """Returns the next unhandled exception, or None if there is
469 no exception."""
470 try:
471 return self._exceptions.get_nowait()
472 except Queue.Empty:
473 return None
474
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000475 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000476 """Retrieves an object from the remote data store.
477
478 The smaller |priority| gets fetched first.
479
480 Thread-safe.
481 """
482 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000483 self._add_to_queue(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000484
485 def get_result(self):
486 """Returns the next file that was successfully fetched."""
487 r = self._done.get()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000488 if r[0] == -1:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000489 # It's an exception.
490 raise r[2][0], r[2][1], r[2][2]
491 return r[2]
492
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000493 def _add_to_queue(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000494 with self._ready_lock:
495 start_new_worker = not self._ready
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000496 self._queue.put((priority, self._next_index(), obj, dest, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000497 if start_new_worker:
498 self._add_worker()
499
500 def _add_worker(self):
501 """Add one worker thread if there isn't too many. Thread-safe."""
502 with self._workers_lock:
503 if len(self._workers) >= self.MAX_WORKERS:
504 return False
505 worker = threading.Thread(target=self._run)
506 self._workers.append(worker)
507 worker.daemon = True
508 worker.start()
509
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000510 def _step_done(self, result):
511 """Worker helper function"""
512 self._done.put(result)
513 self._queue.task_done()
514 if result[0] == -1:
515 self._exceptions.put(sys.exc_info())
516
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000517 def _run(self):
518 """Worker thread loop."""
519 while True:
520 try:
521 with self._ready_lock:
522 self._ready += 1
523 item = self._queue.get()
524 finally:
525 with self._ready_lock:
526 self._ready -= 1
527 if not item:
528 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000529 priority, index, obj, dest, size = item
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000530 try:
531 self._do_item(obj, dest)
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000532 if size and not valid_file(dest, size):
533 download_size = os.stat(dest).st_size
534 os.remove(dest)
535 raise IOError('File incorrect size after download of %s. Got %s and '
maruel@chromium.org3f039182012-11-27 21:32:41 +0000536 'expected %s' % (obj, download_size, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000537 except IOError:
538 # Retry a few times, lowering the priority.
539 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000540 self._add_to_queue(priority + 1, obj, dest, size)
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000541 self._queue.task_done()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000542 continue
543 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000544 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000545 except:
546 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000547 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000548 else:
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000549 self._step_done((priority, index, obj))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000550
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000551 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000552 """Returns a object to retrieve objects from a remote."""
553 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000554 def download_file(item, dest):
555 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
556 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000557 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +0000558 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000559 logging.debug('download_file(%s)', zipped_source)
560 connection = urllib2.urlopen(zipped_source)
561 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +0000562 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000563 with open(dest, 'wb') as f:
564 while True:
565 chunk = connection.read(ZIPPED_FILE_CHUNK)
566 if not chunk:
567 break
maruel@chromium.org3f039182012-11-27 21:32:41 +0000568 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000569 f.write(decompressor.decompress(chunk))
570 # Ensure that all the data was properly decompressed.
571 uncompressed_data = decompressor.flush()
572 assert not uncompressed_data
csharp@chromium.orga110d792013-01-07 16:16:16 +0000573 except httplib.HTTPException as e:
574 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +0000575 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +0000576 # Log the first bytes to see if it's uncompressed data.
577 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +0000578 raise IOError(
579 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
580 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000581
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000582 return download_file
583
584 def copy_file(item, dest):
585 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000586 if source == dest:
587 logging.info('Source and destination are the same, no action required')
588 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000589 logging.debug('copy_file(%s, %s)', source, dest)
590 shutil.copy(source, dest)
591 return copy_file
592
593
594class CachePolicies(object):
595 def __init__(self, max_cache_size, min_free_space, max_items):
596 """
597 Arguments:
598 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
599 cache is effectively a leak.
600 - min_free_space: Trim if disk free space becomes lower than this value. If
601 0, it unconditionally fill the disk.
602 - max_items: Maximum number of items to keep in the cache. If 0, do not
603 enforce a limit.
604 """
605 self.max_cache_size = max_cache_size
606 self.min_free_space = min_free_space
607 self.max_items = max_items
608
609
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000610class NoCache(object):
611 """This class is intended to be usable everywhere the Cache class is.
612 Instead of downloading to a cache, all files are downloaded to the target
613 directory and then moved to where they are needed.
614 """
615
616 def __init__(self, target_directory, remote):
617 self.target_directory = target_directory
618 self.remote = remote
619
620 def retrieve(self, priority, item, size):
621 """Get the request file."""
622 self.remote.add_item(priority, item, self.path(item), size)
623 self.remote.get_result()
624
625 def wait_for(self, items):
626 """Download the first item of the given list if it is missing."""
627 item = items.iterkeys().next()
628
629 if not os.path.exists(self.path(item)):
630 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
631 downloaded = self.remote.get_result()
632 assert downloaded == item
633
634 return item
635
636 def path(self, item):
637 return os.path.join(self.target_directory, item)
638
639
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000640class Cache(object):
641 """Stateful LRU cache.
642
643 Saves its state as json file.
644 """
645 STATE_FILE = 'state.json'
646
647 def __init__(self, cache_dir, remote, policies):
648 """
649 Arguments:
650 - cache_dir: Directory where to place the cache.
651 - remote: Remote where to fetch items from.
652 - policies: cache retention policies.
653 """
654 self.cache_dir = cache_dir
655 self.remote = remote
656 self.policies = policies
657 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
658 # The tuple(file, size) are kept as an array in a LRU style. E.g.
659 # self.state[0] is the oldest item.
660 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +0000661 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000662 # A lookup map to speed up searching.
663 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +0000664 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000665
666 # Items currently being fetched. Keep it local to reduce lock contention.
667 self._pending_queue = set()
668
669 # Profiling values.
670 self._added = []
671 self._removed = []
672 self._free_disk = 0
673
maruel@chromium.org770993b2012-12-11 17:16:48 +0000674 with Profiler('Setup'):
675 if not os.path.isdir(self.cache_dir):
676 os.makedirs(self.cache_dir)
677 if os.path.isfile(self.state_file):
678 try:
679 self.state = json.load(open(self.state_file, 'r'))
680 except (IOError, ValueError), e:
681 # Too bad. The file will be overwritten and the cache cleared.
682 logging.error(
683 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
684 self._state_need_to_be_saved = True
685 if (not isinstance(self.state, list) or
686 not all(
687 isinstance(i, (list, tuple)) and len(i) == 2
688 for i in self.state)):
689 # Discard.
690 self._state_need_to_be_saved = True
691 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000692
maruel@chromium.org770993b2012-12-11 17:16:48 +0000693 # Ensure that all files listed in the state still exist and add new ones.
694 previous = set(filename for filename, _ in self.state)
695 if len(previous) != len(self.state):
696 logging.warn('Cache state is corrupted, found duplicate files')
697 self._state_need_to_be_saved = True
698 self.state = []
699
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000700 added = 0
701 for filename in os.listdir(self.cache_dir):
702 if filename == self.STATE_FILE:
703 continue
704 if filename in previous:
705 previous.remove(filename)
706 continue
707 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000708 if not RE_IS_SHA1.match(filename):
709 logging.warn('Removing unknown file %s from cache', filename)
710 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +0000711 continue
712 # Insert as the oldest file. It will be deleted eventually if not
713 # accessed.
714 self._add(filename, False)
715 logging.warn('Add unknown file %s to cache', filename)
716 added += 1
717
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000718 if added:
719 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000720 if previous:
721 logging.warn('Removed %d lost files', len(previous))
722 # Set explicitly in case self._add() wasn't called.
723 self._state_need_to_be_saved = True
724 # Filter out entries that were not found while keeping the previous
725 # order.
726 self.state = [
727 (filename, size) for filename, size in self.state
728 if filename not in previous
729 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000730 self.trim()
731
732 def __enter__(self):
733 return self
734
735 def __exit__(self, _exc_type, _exec_value, _traceback):
736 with Profiler('CleanupTrimming'):
737 self.trim()
738
739 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000740 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000741 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000742 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000743 len(self.state),
744 sum(i[1] for i in self.state) / 1024)
745 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000746 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
747 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000748
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000749 def remove_file_at_index(self, index):
750 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000751 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +0000752 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000753 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000754 # If the lookup was already stale, its possible the filename was not
755 # present yet.
756 self._lookup_is_stale = True
757 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000758 self._removed.append(size)
759 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000760 except OSError as e:
761 logging.error('Error attempting to delete a file\n%s' % e)
762
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000763 def remove_lru_file(self):
764 """Removes the last recently used file."""
765 self.remove_file_at_index(0)
766
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000767 def trim(self):
768 """Trims anything we don't know, make sure enough free space exists."""
769 # Ensure maximum cache size.
770 if self.policies.max_cache_size and self.state:
771 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
772 self.remove_lru_file()
773
774 # Ensure maximum number of items in the cache.
775 if self.policies.max_items and self.state:
776 while len(self.state) > self.policies.max_items:
777 self.remove_lru_file()
778
779 # Ensure enough free space.
780 self._free_disk = get_free_space(self.cache_dir)
781 while (
782 self.policies.min_free_space and
783 self.state and
784 self._free_disk < self.policies.min_free_space):
785 self.remove_lru_file()
786 self._free_disk = get_free_space(self.cache_dir)
787
788 self.save()
789
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000790 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000791 """Retrieves a file from the remote, if not already cached, and adds it to
792 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000793
794 If the file is in the cache, verifiy that the file is valid (i.e. it is
795 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000796 """
797 assert not '/' in item
798 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000799 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000800 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000801
802 if index is not None:
803 if not valid_file(self.path(item), size):
804 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000805 index = None
806 else:
807 assert index < len(self.state)
808 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +0000809 self._state_need_to_be_saved = True
810 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000811 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000812
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000813 if index is None:
814 if item in self._pending_queue:
815 # Already pending. The same object could be referenced multiple times.
816 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000817 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000818 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000819
820 def add(self, filepath, obj):
821 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000822 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000823 if not obj in self._lookup:
824 link_file(self.path(obj), filepath, HARDLINK)
825 self._add(obj, True)
826
827 def path(self, item):
828 """Returns the path to one item."""
829 return os.path.join(self.cache_dir, item)
830
831 def save(self):
832 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000833 if self._state_need_to_be_saved:
834 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
835 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000836
837 def wait_for(self, items):
838 """Starts a loop that waits for at least one of |items| to be retrieved.
839
840 Returns the first item retrieved.
841 """
842 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +0000843 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000844 for item in items:
845 if item in self._lookup:
846 return item
847
848 assert all(i in self._pending_queue for i in items), (
849 items, self._pending_queue)
850 # Note that:
851 # len(self._pending_queue) ==
852 # ( len(self.remote._workers) - self.remote._ready +
853 # len(self._remote._queue) + len(self._remote.done))
854 # There is no lock-free way to verify that.
855 while self._pending_queue:
856 item = self.remote.get_result()
857 self._pending_queue.remove(item)
858 self._add(item, True)
859 if item in items:
860 return item
861
862 def _add(self, item, at_end):
863 """Adds an item in the internal state.
864
865 If |at_end| is False, self._lookup becomes inconsistent and
866 self._update_lookup() must be called.
867 """
868 size = os.stat(self.path(item)).st_size
869 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000870 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000871 if at_end:
872 self.state.append((item, size))
873 self._lookup[item] = len(self.state) - 1
874 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +0000875 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000876 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000877
878 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +0000879 if self._lookup_is_stale:
880 self._lookup = dict(
881 (filename, index) for index, (filename, _) in enumerate(self.state))
882 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000883
884
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000885class IsolatedFile(object):
886 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000887 def __init__(self, obj_hash):
888 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000889 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000890 self.obj_hash = obj_hash
891 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000892 # .isolate and all the .isolated files recursively included by it with
893 # 'includes' key. The order of each sha-1 in 'includes', each representing a
894 # .isolated file in the hash table, is important, as the later ones are not
895 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000896 self.can_fetch = False
897
898 # Raw data.
899 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000900 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000901 self.children = []
902
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000903 # Set once the .isolated file is loaded.
904 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000905 # Set once the files are fetched.
906 self.files_fetched = False
907
908 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000909 """Verifies the .isolated file is valid and loads this object with the json
910 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000911 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000912 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
913 assert not self._is_parsed
914 self.data = load_isolated(content)
915 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
916 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000917
918 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000919 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000920
921 Preemptively request files.
922
923 Note that |files| is modified by this function.
924 """
925 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000926 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000927 return
928 logging.debug('fetch_files(%s)' % self.obj_hash)
929 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000930 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000931 # overriden files must not be fetched.
932 if filepath not in files:
933 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000934 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000935 # Preemptively request files.
936 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000937 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000938 self.files_fetched = True
939
940
941class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000942 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000943 def __init__(self):
944 self.command = []
945 self.files = {}
946 self.read_only = None
947 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000948 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000949 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000950
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000951 def load(self, cache, root_isolated_hash):
952 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000953
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000954 It enables support for "included" .isolated files. They are processed in
955 strict order but fetched asynchronously from the cache. This is important so
956 that a file in an included .isolated file that is overridden by an embedding
957 .isolated file is not fetched neededlessly. The includes are fetched in one
958 pass and the files are fetched as soon as all the ones on the left-side
959 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000960
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000961 The prioritization is very important here for nested .isolated files.
962 'includes' have the highest priority and the algorithm is optimized for both
963 deep and wide trees. A deep one is a long link of .isolated files referenced
964 one at a time by one item in 'includes'. A wide one has a large number of
965 'includes' in a single .isolated file. 'left' is defined as an included
966 .isolated file earlier in the 'includes' list. So the order of the elements
967 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000968 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000969 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000970 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000971 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000972 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000973 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000974
975 def update_self(node):
976 node.fetch_files(cache, self.files)
977 # Grabs properties.
978 if not self.command and node.data.get('command'):
979 self.command = node.data['command']
980 if self.read_only is None and node.data.get('read_only') is not None:
981 self.read_only = node.data['read_only']
982 if (self.relative_cwd is None and
983 node.data.get('relative_cwd') is not None):
984 self.relative_cwd = node.data['relative_cwd']
985
986 def traverse_tree(node):
987 if node.can_fetch:
988 if not node.files_fetched:
989 update_self(node)
990 will_break = False
991 for i in node.children:
992 if not i.can_fetch:
993 if will_break:
994 break
995 # Automatically mark the first one as fetcheable.
996 i.can_fetch = True
997 will_break = True
998 traverse_tree(i)
999
1000 while pending:
1001 item_hash = cache.wait_for(pending)
1002 item = pending.pop(item_hash)
1003 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001004 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001005 # It's the root item.
1006 item.can_fetch = True
1007
1008 for new_child in item.children:
1009 h = new_child.obj_hash
1010 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001011 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001012 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001013 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001014
1015 # Traverse the whole tree to see if files can now be fetched.
1016 traverse_tree(self.root)
1017 def check(n):
1018 return all(check(x) for x in n.children) and n.files_fetched
1019 assert check(self.root)
1020 self.relative_cwd = self.relative_cwd or ''
1021 self.read_only = self.read_only or False
1022
1023
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001024def create_directories(base_directory, files):
1025 """Creates the directory structure needed by the given list of files."""
1026 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1027 # Creates the tree of directories to create.
1028 directories = set(os.path.dirname(f) for f in files)
1029 for item in list(directories):
1030 while item:
1031 directories.add(item)
1032 item = os.path.dirname(item)
1033 for d in sorted(directories):
1034 if d:
1035 os.mkdir(os.path.join(base_directory, d))
1036
1037
1038def create_links(base_directory, files):
1039 """Creates any links needed by the given set of files."""
1040 for filepath, properties in files:
1041 if 'link' not in properties:
1042 continue
1043 outfile = os.path.join(base_directory, filepath)
1044 # symlink doesn't exist on Windows. So the 'link' property should
1045 # never be specified for windows .isolated file.
1046 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1047 if 'm' in properties:
1048 lchmod = getattr(os, 'lchmod', None)
1049 if lchmod:
1050 lchmod(outfile, properties['m'])
1051
1052
1053def setup_commands(base_directory, cwd, cmd):
1054 """Correctly adjusts and then returns the required working directory
1055 and command needed to run the test.
1056 """
1057 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1058 cwd = os.path.join(base_directory, cwd)
1059 if not os.path.isdir(cwd):
1060 os.makedirs(cwd)
1061
1062 # Ensure paths are correctly separated on windows.
1063 cmd[0] = cmd[0].replace('/', os.path.sep)
1064 cmd = fix_python_path(cmd)
1065
1066 return cwd, cmd
1067
1068
1069def generate_remaining_files(files):
1070 """Generates a dictionary of all the remaining files to be downloaded."""
1071 remaining = {}
1072 for filepath, props in files:
1073 if 'h' in props:
1074 remaining.setdefault(props['h'], []).append((filepath, props))
1075
1076 return remaining
1077
1078
1079def download_test_data(isolated_hash, target_directory, remote):
1080 """Downloads the dependencies to the given directory."""
1081 if not os.path.exists(target_directory):
1082 os.makedirs(target_directory)
1083
1084 settings = Settings()
1085 no_cache = NoCache(target_directory, Remote(remote))
1086
1087 # Download all the isolated files.
1088 with Profiler('GetIsolateds') as _prof:
1089 settings.load(no_cache, isolated_hash)
1090
1091 if not settings.command:
1092 print >> sys.stderr, 'No command to run'
1093 return 1
1094
1095 with Profiler('GetRest') as _prof:
1096 create_directories(target_directory, settings.files)
1097 create_links(target_directory, settings.files.iteritems())
1098
1099 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1100 settings.command[:])
1101
1102 remaining = generate_remaining_files(settings.files.iteritems())
1103
1104 # Now block on the remaining files to be downloaded and mapped.
1105 logging.info('Retrieving remaining files')
1106 last_update = time.time()
1107 while remaining:
1108 obj = no_cache.wait_for(remaining)
1109 files = remaining.pop(obj)
1110
1111 for i, (filepath, properties) in enumerate(files):
1112 outfile = os.path.join(target_directory, filepath)
1113 logging.info(no_cache.path(obj))
1114
1115 if i + 1 == len(files):
1116 os.rename(no_cache.path(obj), outfile)
1117 else:
1118 shutil.copyfile(no_cache.path(obj), outfile)
1119
1120 if 'm' in properties:
1121 # It's not set on Windows.
1122 os.chmod(outfile, properties['m'])
1123
1124 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1125 logging.info('%d files remaining...' % len(remaining))
1126 last_update = time.time()
1127
1128 print('.isolated files successfully downloaded and setup in %s' %
1129 target_directory)
1130 print('To run this test please run the command %s from the directory %s' %
1131 (cmd, cwd))
1132
1133 return 0
1134
1135
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001136def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001137 """Downloads the dependencies in the cache, hardlinks them into a temporary
1138 directory and runs the executable.
1139 """
1140 settings = Settings()
1141 with Cache(cache_dir, Remote(remote), policies) as cache:
1142 outdir = make_temp_dir('run_tha_test', cache_dir)
1143 try:
1144 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001145 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001146 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001147 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001148 # Adds it in the cache. While not strictly necessary, this simplifies
1149 # the rest.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001150 h = hashlib.sha1(open(isolated_hash, 'r').read()).hexdigest()
1151 cache.add(isolated_hash, h)
1152 isolated_hash = h
1153 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001154
1155 if not settings.command:
1156 print >> sys.stderr, 'No command to run'
1157 return 1
1158
1159 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001160 create_directories(outdir, settings.files)
1161 create_links(outdir, settings.files.iteritems())
1162 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001163
1164 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001165 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1166 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001167
1168 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001169 logging.info('Retrieving remaining files')
1170 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001171 while remaining:
1172 obj = cache.wait_for(remaining)
1173 for filepath, properties in remaining.pop(obj):
1174 outfile = os.path.join(outdir, filepath)
1175 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001176 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001177 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001178 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001179
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001180 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1181 logging.info('%d files remaining...' % len(remaining))
1182 last_update = time.time()
1183
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001184 if settings.read_only:
1185 make_writable(outdir, True)
1186 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001187
1188 # TODO(csharp): This should be specified somewhere else.
1189 # Add a rotating log file if one doesn't already exist.
1190 env = os.environ.copy()
1191 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001192 try:
1193 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001194 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001195 except OSError:
1196 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1197 raise
1198 finally:
1199 rmtree(outdir)
1200
1201
1202def main():
1203 parser = optparse.OptionParser(
1204 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1205 parser.add_option(
1206 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1207 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1208
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001209 group = optparse.OptionGroup(parser, 'Download')
1210 group.add_option(
1211 '--download', metavar='DEST',
1212 help='Downloads files to DEST and returns without running, instead of '
1213 'downloading and then running from a temporary directory.')
1214 parser.add_option_group(group)
1215
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001216 group = optparse.OptionGroup(parser, 'Data source')
1217 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001218 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001219 metavar='FILE',
1220 help='File/url describing what to map or run')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001221 # TODO(maruel): Remove once not used anymore.
1222 group.add_option(
1223 '-m', '--manifest', dest='isolated', help=optparse.SUPPRESS_HELP)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001224 group.add_option(
1225 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001226 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001227 parser.add_option_group(group)
1228
1229 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001230 '-r', '--remote', metavar='URL',
1231 default=
1232 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1233 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001234 group = optparse.OptionGroup(parser, 'Cache management')
1235 group.add_option(
1236 '--cache',
1237 default='cache',
1238 metavar='DIR',
1239 help='Cache directory, default=%default')
1240 group.add_option(
1241 '--max-cache-size',
1242 type='int',
1243 metavar='NNN',
1244 default=20*1024*1024*1024,
1245 help='Trim if the cache gets larger than this value, default=%default')
1246 group.add_option(
1247 '--min-free-space',
1248 type='int',
1249 metavar='NNN',
1250 default=1*1024*1024*1024,
1251 help='Trim if disk free space becomes lower than this value, '
1252 'default=%default')
1253 group.add_option(
1254 '--max-items',
1255 type='int',
1256 metavar='NNN',
1257 default=100000,
1258 help='Trim if more than this number of items are in the cache '
1259 'default=%default')
1260 parser.add_option_group(group)
1261
1262 options, args = parser.parse_args()
1263 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001264
1265 logging_console = logging.StreamHandler()
1266 logging_console.setFormatter(logging.Formatter(
1267 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1268 logging_console.setLevel(level)
1269 logging.getLogger().addHandler(logging_console)
1270
1271 logging_rotating_file = logging.handlers.RotatingFileHandler(
1272 RUN_ISOLATED_LOG_FILE,
1273 maxBytes=10 * 1024 * 1024, backupCount=5)
1274 logging_rotating_file.setLevel(logging.DEBUG)
1275 logging_rotating_file.setFormatter(logging.Formatter(
1276 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1277 logging.getLogger().addHandler(logging_rotating_file)
1278
1279 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001280
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001281 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001282 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001283 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001284 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001285 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001286 parser.error('Unsupported args %s' % ' '.join(args))
1287
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001288 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001289 policies = CachePolicies(
1290 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001291
1292 if options.download:
1293 return download_test_data(options.isolated or options.hash,
1294 options.download, options.remote)
1295 else:
1296 try:
1297 return run_tha_test(
1298 options.isolated or options.hash,
1299 options.cache,
1300 options.remote,
1301 policies)
1302 except Exception, e:
1303 # Make sure any exception is logged.
1304 logging.exception(e)
1305 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001306
1307
1308if __name__ == '__main__':
1309 sys.exit(main())