blob: 0959c861bbb06b70cbee5718d2ebc54346541df8 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000011import ctypes
12import hashlib
13import json
14import logging
15import optparse
16import os
17import Queue
18import re
19import shutil
20import stat
21import subprocess
22import sys
23import tempfile
24import threading
25import time
26import urllib
27
28
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000029# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000030HARDLINK, SYMLINK, COPY = range(1, 4)
31
32RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
33
csharp@chromium.org8dc52542012-11-08 20:29:55 +000034# The file size to be used when we don't know the correct file size,
35# generally used for .isolated files.
36UNKNOWN_FILE_SIZE = None
37
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000038
39class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +000040 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000041 pass
42
43
44class MappingError(OSError):
45 """Failed to recreate the tree."""
46 pass
47
48
49def get_flavor():
50 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
51 flavors = {
52 'cygwin': 'win',
53 'win32': 'win',
54 'darwin': 'mac',
55 'sunos5': 'solaris',
56 'freebsd7': 'freebsd',
57 'freebsd8': 'freebsd',
58 }
59 return flavors.get(sys.platform, 'linux')
60
61
62def os_link(source, link_name):
63 """Add support for os.link() on Windows."""
64 if sys.platform == 'win32':
65 if not ctypes.windll.kernel32.CreateHardLinkW(
66 unicode(link_name), unicode(source), 0):
67 raise OSError()
68 else:
69 os.link(source, link_name)
70
71
72def readable_copy(outfile, infile):
73 """Makes a copy of the file that is readable by everyone."""
74 shutil.copy(infile, outfile)
75 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
76 stat.S_IRGRP | stat.S_IROTH)
77 os.chmod(outfile, read_enabled_mode)
78
79
80def link_file(outfile, infile, action):
81 """Links a file. The type of link depends on |action|."""
82 logging.debug('Mapping %s to %s' % (infile, outfile))
83 if action not in (HARDLINK, SYMLINK, COPY):
84 raise ValueError('Unknown mapping action %s' % action)
85 if not os.path.isfile(infile):
86 raise MappingError('%s is missing' % infile)
87 if os.path.isfile(outfile):
88 raise MappingError(
89 '%s already exist; insize:%d; outsize:%d' %
90 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
91
92 if action == COPY:
93 readable_copy(outfile, infile)
94 elif action == SYMLINK and sys.platform != 'win32':
95 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +000096 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000097 else:
98 try:
99 os_link(infile, outfile)
100 except OSError:
101 # Probably a different file system.
102 logging.warn(
103 'Failed to hardlink, failing back to copy %s to %s' % (
104 infile, outfile))
105 readable_copy(outfile, infile)
106
107
108def _set_write_bit(path, read_only):
109 """Sets or resets the executable bit on a file or directory."""
110 mode = os.lstat(path).st_mode
111 if read_only:
112 mode = mode & 0500
113 else:
114 mode = mode | 0200
115 if hasattr(os, 'lchmod'):
116 os.lchmod(path, mode) # pylint: disable=E1101
117 else:
118 if stat.S_ISLNK(mode):
119 # Skip symlink without lchmod() support.
120 logging.debug('Can\'t change +w bit on symlink %s' % path)
121 return
122
123 # TODO(maruel): Implement proper DACL modification on Windows.
124 os.chmod(path, mode)
125
126
127def make_writable(root, read_only):
128 """Toggle the writable bit on a directory tree."""
129 root = os.path.abspath(root)
130 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
131 for filename in filenames:
132 _set_write_bit(os.path.join(dirpath, filename), read_only)
133
134 for dirname in dirnames:
135 _set_write_bit(os.path.join(dirpath, dirname), read_only)
136
137
138def rmtree(root):
139 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
140 make_writable(root, False)
141 if sys.platform == 'win32':
142 for i in range(3):
143 try:
144 shutil.rmtree(root)
145 break
146 except WindowsError: # pylint: disable=E0602
147 delay = (i+1)*2
148 print >> sys.stderr, (
149 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
150 time.sleep(delay)
151 else:
152 shutil.rmtree(root)
153
154
155def is_same_filesystem(path1, path2):
156 """Returns True if both paths are on the same filesystem.
157
158 This is required to enable the use of hardlinks.
159 """
160 assert os.path.isabs(path1), path1
161 assert os.path.isabs(path2), path2
162 if sys.platform == 'win32':
163 # If the drive letter mismatches, assume it's a separate partition.
164 # TODO(maruel): It should look at the underlying drive, a drive letter could
165 # be a mount point to a directory on another drive.
166 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
167 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
168 if path1[0].lower() != path2[0].lower():
169 return False
170 return os.stat(path1).st_dev == os.stat(path2).st_dev
171
172
173def get_free_space(path):
174 """Returns the number of free bytes."""
175 if sys.platform == 'win32':
176 free_bytes = ctypes.c_ulonglong(0)
177 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
178 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
179 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000180 # For OSes other than Windows.
181 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000182 return f.f_bfree * f.f_frsize
183
184
185def make_temp_dir(prefix, root_dir):
186 """Returns a temporary directory on the same file system as root_dir."""
187 base_temp_dir = None
188 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
189 base_temp_dir = os.path.dirname(root_dir)
190 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
191
192
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000193def load_isolated(content):
194 """Verifies the .isolated file is valid and loads this object with the json
195 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000196 """
197 try:
198 data = json.loads(content)
199 except ValueError:
200 raise ConfigError('Failed to parse: %s...' % content[:100])
201
202 if not isinstance(data, dict):
203 raise ConfigError('Expected dict, got %r' % data)
204
205 for key, value in data.iteritems():
206 if key == 'command':
207 if not isinstance(value, list):
208 raise ConfigError('Expected list, got %r' % value)
209 for subvalue in value:
210 if not isinstance(subvalue, basestring):
211 raise ConfigError('Expected string, got %r' % subvalue)
212
213 elif key == 'files':
214 if not isinstance(value, dict):
215 raise ConfigError('Expected dict, got %r' % value)
216 for subkey, subvalue in value.iteritems():
217 if not isinstance(subkey, basestring):
218 raise ConfigError('Expected string, got %r' % subkey)
219 if not isinstance(subvalue, dict):
220 raise ConfigError('Expected dict, got %r' % subvalue)
221 for subsubkey, subsubvalue in subvalue.iteritems():
222 if subsubkey == 'link':
223 if not isinstance(subsubvalue, basestring):
224 raise ConfigError('Expected string, got %r' % subsubvalue)
225 elif subsubkey == 'mode':
226 if not isinstance(subsubvalue, int):
227 raise ConfigError('Expected int, got %r' % subsubvalue)
228 elif subsubkey == 'sha-1':
229 if not RE_IS_SHA1.match(subsubvalue):
230 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
231 elif subsubkey == 'size':
232 if not isinstance(subsubvalue, int):
233 raise ConfigError('Expected int, got %r' % subsubvalue)
234 elif subsubkey == 'timestamp':
235 if not isinstance(subsubvalue, int):
236 raise ConfigError('Expected int, got %r' % subsubvalue)
237 elif subsubkey == 'touched_only':
238 if not isinstance(subsubvalue, bool):
239 raise ConfigError('Expected bool, got %r' % subsubvalue)
240 else:
241 raise ConfigError('Unknown subsubkey %s' % subsubkey)
242 if bool('sha-1' in subvalue) and bool('link' in subvalue):
243 raise ConfigError(
244 'Did not expect both \'sha-1\' and \'link\', got: %r' % subvalue)
245
246 elif key == 'includes':
247 if not isinstance(value, list):
248 raise ConfigError('Expected list, got %r' % value)
249 for subvalue in value:
250 if not RE_IS_SHA1.match(subvalue):
251 raise ConfigError('Expected sha-1, got %r' % subvalue)
252
253 elif key == 'read_only':
254 if not isinstance(value, bool):
255 raise ConfigError('Expected bool, got %r' % value)
256
257 elif key == 'relative_cwd':
258 if not isinstance(value, basestring):
259 raise ConfigError('Expected string, got %r' % value)
260
261 elif key == 'os':
262 if value != get_flavor():
263 raise ConfigError(
264 'Expected \'os\' to be \'%s\' but got \'%s\'' %
265 (get_flavor(), value))
266
267 else:
268 raise ConfigError('Unknown key %s' % key)
269
270 return data
271
272
273def fix_python_path(cmd):
274 """Returns the fixed command line to call the right python executable."""
275 out = cmd[:]
276 if out[0] == 'python':
277 out[0] = sys.executable
278 elif out[0].endswith('.py'):
279 out.insert(0, sys.executable)
280 return out
281
282
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000283class WorkerThread(threading.Thread):
284 """Keeps the results of each task in a thread-local outputs variable."""
285 def __init__(self, tasks, *args, **kwargs):
286 super(WorkerThread, self).__init__(*args, **kwargs)
287 self._tasks = tasks
288 self.outputs = []
289 self.exceptions = []
290
291 self.daemon = True
292 self.start()
293
294 def run(self):
295 """Runs until a None task is queued."""
296 while True:
297 task = self._tasks.get()
298 if task is None:
299 logging.debug('Worker thread %s exiting, no more tasks found',
300 self.name)
301 # We're done.
302 return
303 try:
304 func, args, kwargs = task
305 logging.debug('Runnings %s with parameters %s and %s', func, args,
306 kwargs)
307 self.outputs.append(func(*args, **kwargs))
308 except Exception, e:
309 logging.error('Caught exception! %s' % e)
310 self.exceptions.append(sys.exc_info())
311 finally:
312 self._tasks.task_done()
313
314
315class ThreadPool(object):
316 """Implements a multithreaded worker pool oriented for mapping jobs with
317 thread-local result storage.
318 """
319 QUEUE_CLASS = Queue.Queue
320
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000321 def __init__(self, num_threads, queue_size=0):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000322 logging.debug('Creating ThreadPool')
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000323 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000324 self._workers = [
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000325 WorkerThread(self.tasks, name='worker-%d' % i)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000326 for i in range(num_threads)
327 ]
328
329 def add_task(self, func, *args, **kwargs):
330 """Adds a task, a function to be executed by a worker.
331
332 The function's return value will be stored in the the worker's thread local
333 outputs list.
334 """
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000335 self.tasks.put((func, args, kwargs))
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000336
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000337 def join(self):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000338 """Extracts all the results from each threads unordered."""
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000339 self.tasks.join()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000340 out = []
341 # Look for exceptions.
342 for w in self._workers:
343 if w.exceptions:
344 raise w.exceptions[0][0], w.exceptions[0][1], w.exceptions[0][2]
345 out.extend(w.outputs)
346 w.outputs = []
347 return out
348
349 def close(self):
350 """Closes all the threads."""
351 for _ in range(len(self._workers)):
352 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000353 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000354 for t in self._workers:
355 t.join()
356
357 def __enter__(self):
358 """Enables 'with' statement."""
359 return self
360
361 def __exit__(self, exc_type, exc_value, traceback):
362 """Enables 'with' statement."""
363 self.close()
364
365
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000366def valid_file(filepath, size):
367 """Determines if the given files appears valid (currently it just checks
368 the file's size)."""
369 return (size == UNKNOWN_FILE_SIZE or size == os.stat(filepath).st_size)
370
371
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000372class Profiler(object):
373 def __init__(self, name):
374 self.name = name
375 self.start_time = None
376
377 def __enter__(self):
378 self.start_time = time.time()
379 return self
380
381 def __exit__(self, _exc_type, _exec_value, _traceback):
382 time_taken = time.time() - self.start_time
383 logging.info('Profiling: Section %s took %3.3f seconds',
384 self.name, time_taken)
385
386
387class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000388 """Priority based worker queue to fetch or upload files from a
389 content-address server. Any function may be given as the fetcher/upload,
390 as long as it takes two inputs (the item contents, and their relative
391 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000392
393 Supports local file system, CIFS or http remotes.
394
395 When the priority of items is equals, works in strict FIFO mode.
396 """
397 # Initial and maximum number of worker threads.
398 INITIAL_WORKERS = 2
399 MAX_WORKERS = 16
400 # Priorities.
401 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
402 INTERNAL_PRIORITY_BITS = (1<<8) - 1
403 RETRIES = 5
404
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000405 def __init__(self, destination_root):
406 # Function to fetch a remote object or upload to a remote location..
407 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000408 # Contains tuple(priority, index, obj, destination).
409 self._queue = Queue.PriorityQueue()
410 # Contains tuple(priority, index, obj).
411 self._done = Queue.PriorityQueue()
412
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000413 # Contains generated exceptions that haven't been handled yet.
414 self._exceptions = Queue.Queue()
415
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000416 # To keep FIFO ordering in self._queue. It is assumed xrange's iterator is
417 # thread-safe.
418 self._next_index = xrange(0, 1<<30).__iter__().next
419
420 # Control access to the following member.
421 self._ready_lock = threading.Lock()
422 # Number of threads in wait state.
423 self._ready = 0
424
425 # Control access to the following member.
426 self._workers_lock = threading.Lock()
427 self._workers = []
428 for _ in range(self.INITIAL_WORKERS):
429 self._add_worker()
430
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000431 def join(self):
432 """Blocks until the queue is empty."""
433 self._queue.join()
434
435 def next_exception(self):
436 """Returns the next unhandled exception, or None if there is
437 no exception."""
438 try:
439 return self._exceptions.get_nowait()
440 except Queue.Empty:
441 return None
442
443 def add_item(self, priority, obj, dest):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000444 """Retrieves an object from the remote data store.
445
446 The smaller |priority| gets fetched first.
447
448 Thread-safe.
449 """
450 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000451 self._add_to_queue(priority, obj, dest)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000452
453 def get_result(self):
454 """Returns the next file that was successfully fetched."""
455 r = self._done.get()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000456 if r[0] == -1:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000457 # It's an exception.
458 raise r[2][0], r[2][1], r[2][2]
459 return r[2]
460
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000461 def _add_to_queue(self, priority, obj, dest):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000462 with self._ready_lock:
463 start_new_worker = not self._ready
464 self._queue.put((priority, self._next_index(), obj, dest))
465 if start_new_worker:
466 self._add_worker()
467
468 def _add_worker(self):
469 """Add one worker thread if there isn't too many. Thread-safe."""
470 with self._workers_lock:
471 if len(self._workers) >= self.MAX_WORKERS:
472 return False
473 worker = threading.Thread(target=self._run)
474 self._workers.append(worker)
475 worker.daemon = True
476 worker.start()
477
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000478 def _step_done(self, result):
479 """Worker helper function"""
480 self._done.put(result)
481 self._queue.task_done()
482 if result[0] == -1:
483 self._exceptions.put(sys.exc_info())
484
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000485 def _run(self):
486 """Worker thread loop."""
487 while True:
488 try:
489 with self._ready_lock:
490 self._ready += 1
491 item = self._queue.get()
492 finally:
493 with self._ready_lock:
494 self._ready -= 1
495 if not item:
496 return
497 priority, index, obj, dest = item
498 try:
499 self._do_item(obj, dest)
500 except IOError:
501 # Retry a few times, lowering the priority.
502 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000503 self._add_to_queue(priority + 1, obj, dest)
504 self._queue.task_done()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000505 continue
506 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000507 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000508 except:
509 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000510 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000511 else:
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000512 self._step_done((priority, index, obj))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000513
514 @staticmethod
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000515 def get_file_handler(file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000516 """Returns a object to retrieve objects from a remote."""
517 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org986c2c42012-10-04 14:39:33 +0000518 # TODO(maruel): This is particularly hackish. It shouldn't rstrip('/') in
519 # the first place or try to append '/'.
520 if not file_or_url.endswith('='):
521 file_or_url = file_or_url.rstrip('/') + '/'
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000522 def download_file(item, dest):
523 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
524 # easy.
525 source = file_or_url + item
526 logging.debug('download_file(%s, %s)', source, dest)
527 urllib.urlretrieve(source, dest)
528 return download_file
529
530 def copy_file(item, dest):
531 source = os.path.join(file_or_url, item)
532 logging.debug('copy_file(%s, %s)', source, dest)
533 shutil.copy(source, dest)
534 return copy_file
535
536
537class CachePolicies(object):
538 def __init__(self, max_cache_size, min_free_space, max_items):
539 """
540 Arguments:
541 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
542 cache is effectively a leak.
543 - min_free_space: Trim if disk free space becomes lower than this value. If
544 0, it unconditionally fill the disk.
545 - max_items: Maximum number of items to keep in the cache. If 0, do not
546 enforce a limit.
547 """
548 self.max_cache_size = max_cache_size
549 self.min_free_space = min_free_space
550 self.max_items = max_items
551
552
553class Cache(object):
554 """Stateful LRU cache.
555
556 Saves its state as json file.
557 """
558 STATE_FILE = 'state.json'
559
560 def __init__(self, cache_dir, remote, policies):
561 """
562 Arguments:
563 - cache_dir: Directory where to place the cache.
564 - remote: Remote where to fetch items from.
565 - policies: cache retention policies.
566 """
567 self.cache_dir = cache_dir
568 self.remote = remote
569 self.policies = policies
570 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
571 # The tuple(file, size) are kept as an array in a LRU style. E.g.
572 # self.state[0] is the oldest item.
573 self.state = []
574 # A lookup map to speed up searching.
575 self._lookup = {}
576 self._dirty = False
577
578 # Items currently being fetched. Keep it local to reduce lock contention.
579 self._pending_queue = set()
580
581 # Profiling values.
582 self._added = []
583 self._removed = []
584 self._free_disk = 0
585
586 if not os.path.isdir(self.cache_dir):
587 os.makedirs(self.cache_dir)
588 if os.path.isfile(self.state_file):
589 try:
590 self.state = json.load(open(self.state_file, 'r'))
591 except (IOError, ValueError), e:
592 # Too bad. The file will be overwritten and the cache cleared.
593 logging.error(
594 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
595 if (not isinstance(self.state, list) or
596 not all(
597 isinstance(i, (list, tuple)) and len(i) == 2 for i in self.state)):
598 # Discard.
599 self.state = []
600 self._dirty = True
601
602 # Ensure that all files listed in the state still exist and add new ones.
603 previous = set(filename for filename, _ in self.state)
604 if len(previous) != len(self.state):
605 logging.warn('Cache state is corrupted')
606 self._dirty = True
607 self.state = []
608 else:
609 added = 0
610 for filename in os.listdir(self.cache_dir):
611 if filename == self.STATE_FILE:
612 continue
613 if filename in previous:
614 previous.remove(filename)
615 continue
616 # An untracked file.
617 self._dirty = True
618 if not RE_IS_SHA1.match(filename):
619 logging.warn('Removing unknown file %s from cache', filename)
620 os.remove(self.path(filename))
621 else:
622 # Insert as the oldest file. It will be deleted eventually if not
623 # accessed.
624 self._add(filename, False)
625 added += 1
626 if added:
627 logging.warn('Added back %d unknown files', added)
628 self.state = [
629 (filename, size) for filename, size in self.state
630 if filename not in previous
631 ]
632 self._update_lookup()
633
634 with Profiler('SetupTrimming'):
635 self.trim()
636
637 def __enter__(self):
638 return self
639
640 def __exit__(self, _exc_type, _exec_value, _traceback):
641 with Profiler('CleanupTrimming'):
642 self.trim()
643
644 logging.info(
645 '%4d (%7dkb) added', len(self._added), sum(self._added) / 1024)
646 logging.info(
647 '%4d (%7dkb) current',
648 len(self.state),
649 sum(i[1] for i in self.state) / 1024)
650 logging.info(
651 '%4d (%7dkb) removed', len(self._removed), sum(self._removed) / 1024)
652 logging.info('%7dkb free', self._free_disk / 1024)
653
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000654 def remove_file_at_index(self, index):
655 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000656 try:
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000657 filename, size = self.state.pop(index)
658 # TODO(csharp): _lookup should self-update.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000659 del self._lookup[filename]
660 self._removed.append(size)
661 os.remove(self.path(filename))
662 self._dirty = True
663 except OSError as e:
664 logging.error('Error attempting to delete a file\n%s' % e)
665
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000666 def remove_lru_file(self):
667 """Removes the last recently used file."""
668 self.remove_file_at_index(0)
669
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000670 def trim(self):
671 """Trims anything we don't know, make sure enough free space exists."""
672 # Ensure maximum cache size.
673 if self.policies.max_cache_size and self.state:
674 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
675 self.remove_lru_file()
676
677 # Ensure maximum number of items in the cache.
678 if self.policies.max_items and self.state:
679 while len(self.state) > self.policies.max_items:
680 self.remove_lru_file()
681
682 # Ensure enough free space.
683 self._free_disk = get_free_space(self.cache_dir)
684 while (
685 self.policies.min_free_space and
686 self.state and
687 self._free_disk < self.policies.min_free_space):
688 self.remove_lru_file()
689 self._free_disk = get_free_space(self.cache_dir)
690
691 self.save()
692
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000693 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000694 """Retrieves a file from the remote, if not already cached, and adds it to
695 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000696
697 If the file is in the cache, verifiy that the file is valid (i.e. it is
698 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000699 """
700 assert not '/' in item
701 path = self.path(item)
702 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000703
704 if index is not None:
705 if not valid_file(self.path(item), size):
706 self.remove_file_at_index(index)
707 self._update_lookup()
708 index = None
709 else:
710 assert index < len(self.state)
711 # Was already in cache. Update it's LRU value by putting it at the end.
712 self.state.append(self.state.pop(index))
713 self._dirty = True
714 self._update_lookup()
715
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000716 if index is None:
717 if item in self._pending_queue:
718 # Already pending. The same object could be referenced multiple times.
719 return
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000720 self.remote.add_item(priority, item, path)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000721 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000722
723 def add(self, filepath, obj):
724 """Forcibly adds a file to the cache."""
725 if not obj in self._lookup:
726 link_file(self.path(obj), filepath, HARDLINK)
727 self._add(obj, True)
728
729 def path(self, item):
730 """Returns the path to one item."""
731 return os.path.join(self.cache_dir, item)
732
733 def save(self):
734 """Saves the LRU ordering."""
735 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
736
737 def wait_for(self, items):
738 """Starts a loop that waits for at least one of |items| to be retrieved.
739
740 Returns the first item retrieved.
741 """
742 # Flush items already present.
743 for item in items:
744 if item in self._lookup:
745 return item
746
747 assert all(i in self._pending_queue for i in items), (
748 items, self._pending_queue)
749 # Note that:
750 # len(self._pending_queue) ==
751 # ( len(self.remote._workers) - self.remote._ready +
752 # len(self._remote._queue) + len(self._remote.done))
753 # There is no lock-free way to verify that.
754 while self._pending_queue:
755 item = self.remote.get_result()
756 self._pending_queue.remove(item)
757 self._add(item, True)
758 if item in items:
759 return item
760
761 def _add(self, item, at_end):
762 """Adds an item in the internal state.
763
764 If |at_end| is False, self._lookup becomes inconsistent and
765 self._update_lookup() must be called.
766 """
767 size = os.stat(self.path(item)).st_size
768 self._added.append(size)
769 if at_end:
770 self.state.append((item, size))
771 self._lookup[item] = len(self.state) - 1
772 else:
773 self.state.insert(0, (item, size))
774 self._dirty = True
775
776 def _update_lookup(self):
777 self._lookup = dict(
778 (filename, index) for index, (filename, _) in enumerate(self.state))
779
780
781
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000782class IsolatedFile(object):
783 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000784 def __init__(self, obj_hash):
785 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000786 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000787 self.obj_hash = obj_hash
788 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000789 # .isolate and all the .isolated files recursively included by it with
790 # 'includes' key. The order of each sha-1 in 'includes', each representing a
791 # .isolated file in the hash table, is important, as the later ones are not
792 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000793 self.can_fetch = False
794
795 # Raw data.
796 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000797 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000798 self.children = []
799
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000800 # Set once the .isolated file is loaded.
801 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000802 # Set once the files are fetched.
803 self.files_fetched = False
804
805 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000806 """Verifies the .isolated file is valid and loads this object with the json
807 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000808 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000809 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
810 assert not self._is_parsed
811 self.data = load_isolated(content)
812 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
813 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000814
815 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000816 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000817
818 Preemptively request files.
819
820 Note that |files| is modified by this function.
821 """
822 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000823 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000824 return
825 logging.debug('fetch_files(%s)' % self.obj_hash)
826 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000827 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000828 # overriden files must not be fetched.
829 if filepath not in files:
830 files[filepath] = properties
831 if 'sha-1' in properties:
832 # Preemptively request files.
833 logging.debug('fetching %s' % filepath)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000834 cache.retrieve(Remote.MED, properties['sha-1'], properties['size'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000835 self.files_fetched = True
836
837
838class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000839 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000840 def __init__(self):
841 self.command = []
842 self.files = {}
843 self.read_only = None
844 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000845 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000846 self.root = None
847 logging.debug('Settings')
848
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000849 def load(self, cache, root_isolated_hash):
850 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000851
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000852 It enables support for "included" .isolated files. They are processed in
853 strict order but fetched asynchronously from the cache. This is important so
854 that a file in an included .isolated file that is overridden by an embedding
855 .isolated file is not fetched neededlessly. The includes are fetched in one
856 pass and the files are fetched as soon as all the ones on the left-side
857 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000858
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000859 The prioritization is very important here for nested .isolated files.
860 'includes' have the highest priority and the algorithm is optimized for both
861 deep and wide trees. A deep one is a long link of .isolated files referenced
862 one at a time by one item in 'includes'. A wide one has a large number of
863 'includes' in a single .isolated file. 'left' is defined as an included
864 .isolated file earlier in the 'includes' list. So the order of the elements
865 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000866 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000867 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000868 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000869 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000870 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000871 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000872
873 def update_self(node):
874 node.fetch_files(cache, self.files)
875 # Grabs properties.
876 if not self.command and node.data.get('command'):
877 self.command = node.data['command']
878 if self.read_only is None and node.data.get('read_only') is not None:
879 self.read_only = node.data['read_only']
880 if (self.relative_cwd is None and
881 node.data.get('relative_cwd') is not None):
882 self.relative_cwd = node.data['relative_cwd']
883
884 def traverse_tree(node):
885 if node.can_fetch:
886 if not node.files_fetched:
887 update_self(node)
888 will_break = False
889 for i in node.children:
890 if not i.can_fetch:
891 if will_break:
892 break
893 # Automatically mark the first one as fetcheable.
894 i.can_fetch = True
895 will_break = True
896 traverse_tree(i)
897
898 while pending:
899 item_hash = cache.wait_for(pending)
900 item = pending.pop(item_hash)
901 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000902 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000903 # It's the root item.
904 item.can_fetch = True
905
906 for new_child in item.children:
907 h = new_child.obj_hash
908 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000909 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000910 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000911 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000912
913 # Traverse the whole tree to see if files can now be fetched.
914 traverse_tree(self.root)
915 def check(n):
916 return all(check(x) for x in n.children) and n.files_fetched
917 assert check(self.root)
918 self.relative_cwd = self.relative_cwd or ''
919 self.read_only = self.read_only or False
920
921
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000922def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000923 """Downloads the dependencies in the cache, hardlinks them into a temporary
924 directory and runs the executable.
925 """
926 settings = Settings()
927 with Cache(cache_dir, Remote(remote), policies) as cache:
928 outdir = make_temp_dir('run_tha_test', cache_dir)
929 try:
930 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000931 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000932 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000933 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000934 # Adds it in the cache. While not strictly necessary, this simplifies
935 # the rest.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000936 h = hashlib.sha1(open(isolated_hash, 'r').read()).hexdigest()
937 cache.add(isolated_hash, h)
938 isolated_hash = h
939 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000940
941 if not settings.command:
942 print >> sys.stderr, 'No command to run'
943 return 1
944
945 with Profiler('GetRest') as _prof:
946 logging.debug('Creating directories')
947 # Creates the tree of directories to create.
948 directories = set(os.path.dirname(f) for f in settings.files)
949 for item in list(directories):
950 while item:
951 directories.add(item)
952 item = os.path.dirname(item)
953 for d in sorted(directories):
954 if d:
955 os.mkdir(os.path.join(outdir, d))
956
957 # Creates the links if necessary.
958 for filepath, properties in settings.files.iteritems():
959 if 'link' not in properties:
960 continue
961 outfile = os.path.join(outdir, filepath)
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000962 # symlink doesn't exist on Windows. So the 'link' property should
963 # never be specified for windows .isolated file.
964 os.symlink(properties['link'], outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000965 if 'mode' in properties:
966 # It's not set on Windows.
maruel@chromium.org96768a42012-10-31 18:49:18 +0000967 lchmod = getattr(os, 'lchmod', None)
968 if lchmod:
969 lchmod(outfile, properties['mode'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000970
971 # Remaining files to be processed.
972 # Note that files could still be not be downloaded yet here.
973 remaining = dict()
974 for filepath, props in settings.files.iteritems():
975 if 'sha-1' in props:
976 remaining.setdefault(props['sha-1'], []).append((filepath, props))
977
978 # Do bookkeeping while files are being downloaded in the background.
979 cwd = os.path.join(outdir, settings.relative_cwd)
980 if not os.path.isdir(cwd):
981 os.makedirs(cwd)
982 cmd = settings.command[:]
983 # Ensure paths are correctly separated on windows.
984 cmd[0] = cmd[0].replace('/', os.path.sep)
985 cmd = fix_python_path(cmd)
986
987 # Now block on the remaining files to be downloaded and mapped.
988 while remaining:
989 obj = cache.wait_for(remaining)
990 for filepath, properties in remaining.pop(obj):
991 outfile = os.path.join(outdir, filepath)
992 link_file(outfile, cache.path(obj), HARDLINK)
993 if 'mode' in properties:
994 # It's not set on Windows.
995 os.chmod(outfile, properties['mode'])
996
997 if settings.read_only:
998 make_writable(outdir, True)
999 logging.info('Running %s, cwd=%s' % (cmd, cwd))
1000 try:
1001 with Profiler('RunTest') as _prof:
1002 return subprocess.call(cmd, cwd=cwd)
1003 except OSError:
1004 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1005 raise
1006 finally:
1007 rmtree(outdir)
1008
1009
1010def main():
1011 parser = optparse.OptionParser(
1012 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1013 parser.add_option(
1014 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1015 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1016
1017 group = optparse.OptionGroup(parser, 'Data source')
1018 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001019 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001020 metavar='FILE',
1021 help='File/url describing what to map or run')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001022 # TODO(maruel): Remove once not used anymore.
1023 group.add_option(
1024 '-m', '--manifest', dest='isolated', help=optparse.SUPPRESS_HELP)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001025 group.add_option(
1026 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001027 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001028 parser.add_option_group(group)
1029
1030 group.add_option(
1031 '-r', '--remote', metavar='URL', help='Remote where to get the items')
1032 group = optparse.OptionGroup(parser, 'Cache management')
1033 group.add_option(
1034 '--cache',
1035 default='cache',
1036 metavar='DIR',
1037 help='Cache directory, default=%default')
1038 group.add_option(
1039 '--max-cache-size',
1040 type='int',
1041 metavar='NNN',
1042 default=20*1024*1024*1024,
1043 help='Trim if the cache gets larger than this value, default=%default')
1044 group.add_option(
1045 '--min-free-space',
1046 type='int',
1047 metavar='NNN',
1048 default=1*1024*1024*1024,
1049 help='Trim if disk free space becomes lower than this value, '
1050 'default=%default')
1051 group.add_option(
1052 '--max-items',
1053 type='int',
1054 metavar='NNN',
1055 default=100000,
1056 help='Trim if more than this number of items are in the cache '
1057 'default=%default')
1058 parser.add_option_group(group)
1059
1060 options, args = parser.parse_args()
1061 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
1062 logging.basicConfig(
1063 level=level,
1064 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
1065
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001066 if bool(options.isolated) == bool(options.hash):
1067 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001068 if not options.remote:
1069 parser.error('--remote is required.')
1070 if args:
1071 parser.error('Unsupported args %s' % ' '.join(args))
1072
1073 policies = CachePolicies(
1074 options.max_cache_size, options.min_free_space, options.max_items)
1075 try:
1076 return run_tha_test(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001077 options.isolated or options.hash,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001078 os.path.abspath(options.cache),
1079 options.remote,
1080 policies)
1081 except (ConfigError, MappingError), e:
1082 print >> sys.stderr, str(e)
1083 return 1
1084
1085
1086if __name__ == '__main__':
1087 sys.exit(main())