blob: 866e3060665b5f17fb0ae4c8a5f8c6f0b4c0d8ac [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Reads a manifest, creates a tree of hardlinks and runs the test.
7
8Keeps a local cache.
9"""
10
11# This is just a copy of the file found in the chromium depot (tools/isolate),
12# it is needed here so that the swarm trigger builder can access it.
13# This should be removed from this repo once the test bootstraping is updated.
14
15import ctypes
16import hashlib
17import json
18import logging
19import optparse
20import os
21import Queue
22import re
23import shutil
24import stat
25import subprocess
26import sys
27import tempfile
28import threading
29import time
30import urllib
31
32
33# Types of action accepted by recreate_tree().
34HARDLINK, SYMLINK, COPY = range(1, 4)
35
36RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
37
38
39class ConfigError(ValueError):
40 """Generic failure to load a manifest."""
41 pass
42
43
44class MappingError(OSError):
45 """Failed to recreate the tree."""
46 pass
47
48
49def get_flavor():
50 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
51 flavors = {
52 'cygwin': 'win',
53 'win32': 'win',
54 'darwin': 'mac',
55 'sunos5': 'solaris',
56 'freebsd7': 'freebsd',
57 'freebsd8': 'freebsd',
58 }
59 return flavors.get(sys.platform, 'linux')
60
61
62def os_link(source, link_name):
63 """Add support for os.link() on Windows."""
64 if sys.platform == 'win32':
65 if not ctypes.windll.kernel32.CreateHardLinkW(
66 unicode(link_name), unicode(source), 0):
67 raise OSError()
68 else:
69 os.link(source, link_name)
70
71
72def readable_copy(outfile, infile):
73 """Makes a copy of the file that is readable by everyone."""
74 shutil.copy(infile, outfile)
75 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
76 stat.S_IRGRP | stat.S_IROTH)
77 os.chmod(outfile, read_enabled_mode)
78
79
80def link_file(outfile, infile, action):
81 """Links a file. The type of link depends on |action|."""
82 logging.debug('Mapping %s to %s' % (infile, outfile))
83 if action not in (HARDLINK, SYMLINK, COPY):
84 raise ValueError('Unknown mapping action %s' % action)
85 if not os.path.isfile(infile):
86 raise MappingError('%s is missing' % infile)
87 if os.path.isfile(outfile):
88 raise MappingError(
89 '%s already exist; insize:%d; outsize:%d' %
90 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
91
92 if action == COPY:
93 readable_copy(outfile, infile)
94 elif action == SYMLINK and sys.platform != 'win32':
95 # On windows, symlink are converted to hardlink and fails over to copy.
96 os.symlink(infile, outfile)
97 else:
98 try:
99 os_link(infile, outfile)
100 except OSError:
101 # Probably a different file system.
102 logging.warn(
103 'Failed to hardlink, failing back to copy %s to %s' % (
104 infile, outfile))
105 readable_copy(outfile, infile)
106
107
108def _set_write_bit(path, read_only):
109 """Sets or resets the executable bit on a file or directory."""
110 mode = os.lstat(path).st_mode
111 if read_only:
112 mode = mode & 0500
113 else:
114 mode = mode | 0200
115 if hasattr(os, 'lchmod'):
116 os.lchmod(path, mode) # pylint: disable=E1101
117 else:
118 if stat.S_ISLNK(mode):
119 # Skip symlink without lchmod() support.
120 logging.debug('Can\'t change +w bit on symlink %s' % path)
121 return
122
123 # TODO(maruel): Implement proper DACL modification on Windows.
124 os.chmod(path, mode)
125
126
127def make_writable(root, read_only):
128 """Toggle the writable bit on a directory tree."""
129 root = os.path.abspath(root)
130 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
131 for filename in filenames:
132 _set_write_bit(os.path.join(dirpath, filename), read_only)
133
134 for dirname in dirnames:
135 _set_write_bit(os.path.join(dirpath, dirname), read_only)
136
137
138def rmtree(root):
139 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
140 make_writable(root, False)
141 if sys.platform == 'win32':
142 for i in range(3):
143 try:
144 shutil.rmtree(root)
145 break
146 except WindowsError: # pylint: disable=E0602
147 delay = (i+1)*2
148 print >> sys.stderr, (
149 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
150 time.sleep(delay)
151 else:
152 shutil.rmtree(root)
153
154
155def is_same_filesystem(path1, path2):
156 """Returns True if both paths are on the same filesystem.
157
158 This is required to enable the use of hardlinks.
159 """
160 assert os.path.isabs(path1), path1
161 assert os.path.isabs(path2), path2
162 if sys.platform == 'win32':
163 # If the drive letter mismatches, assume it's a separate partition.
164 # TODO(maruel): It should look at the underlying drive, a drive letter could
165 # be a mount point to a directory on another drive.
166 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
167 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
168 if path1[0].lower() != path2[0].lower():
169 return False
170 return os.stat(path1).st_dev == os.stat(path2).st_dev
171
172
173def get_free_space(path):
174 """Returns the number of free bytes."""
175 if sys.platform == 'win32':
176 free_bytes = ctypes.c_ulonglong(0)
177 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
178 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
179 return free_bytes.value
180 f = os.statvfs(path)
181 return f.f_bfree * f.f_frsize
182
183
184def make_temp_dir(prefix, root_dir):
185 """Returns a temporary directory on the same file system as root_dir."""
186 base_temp_dir = None
187 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
188 base_temp_dir = os.path.dirname(root_dir)
189 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
190
191
192def load_manifest(content):
193 """Verifies the manifest is valid and loads this object with the json data.
194 """
195 try:
196 data = json.loads(content)
197 except ValueError:
198 raise ConfigError('Failed to parse: %s...' % content[:100])
199
200 if not isinstance(data, dict):
201 raise ConfigError('Expected dict, got %r' % data)
202
203 for key, value in data.iteritems():
204 if key == 'command':
205 if not isinstance(value, list):
206 raise ConfigError('Expected list, got %r' % value)
207 for subvalue in value:
208 if not isinstance(subvalue, basestring):
209 raise ConfigError('Expected string, got %r' % subvalue)
210
211 elif key == 'files':
212 if not isinstance(value, dict):
213 raise ConfigError('Expected dict, got %r' % value)
214 for subkey, subvalue in value.iteritems():
215 if not isinstance(subkey, basestring):
216 raise ConfigError('Expected string, got %r' % subkey)
217 if not isinstance(subvalue, dict):
218 raise ConfigError('Expected dict, got %r' % subvalue)
219 for subsubkey, subsubvalue in subvalue.iteritems():
220 if subsubkey == 'link':
221 if not isinstance(subsubvalue, basestring):
222 raise ConfigError('Expected string, got %r' % subsubvalue)
223 elif subsubkey == 'mode':
224 if not isinstance(subsubvalue, int):
225 raise ConfigError('Expected int, got %r' % subsubvalue)
226 elif subsubkey == 'sha-1':
227 if not RE_IS_SHA1.match(subsubvalue):
228 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
229 elif subsubkey == 'size':
230 if not isinstance(subsubvalue, int):
231 raise ConfigError('Expected int, got %r' % subsubvalue)
232 elif subsubkey == 'timestamp':
233 if not isinstance(subsubvalue, int):
234 raise ConfigError('Expected int, got %r' % subsubvalue)
235 elif subsubkey == 'touched_only':
236 if not isinstance(subsubvalue, bool):
237 raise ConfigError('Expected bool, got %r' % subsubvalue)
238 else:
239 raise ConfigError('Unknown subsubkey %s' % subsubkey)
240 if bool('sha-1' in subvalue) and bool('link' in subvalue):
241 raise ConfigError(
242 'Did not expect both \'sha-1\' and \'link\', got: %r' % subvalue)
243
244 elif key == 'includes':
245 if not isinstance(value, list):
246 raise ConfigError('Expected list, got %r' % value)
247 for subvalue in value:
248 if not RE_IS_SHA1.match(subvalue):
249 raise ConfigError('Expected sha-1, got %r' % subvalue)
250
251 elif key == 'read_only':
252 if not isinstance(value, bool):
253 raise ConfigError('Expected bool, got %r' % value)
254
255 elif key == 'relative_cwd':
256 if not isinstance(value, basestring):
257 raise ConfigError('Expected string, got %r' % value)
258
259 elif key == 'os':
260 if value != get_flavor():
261 raise ConfigError(
262 'Expected \'os\' to be \'%s\' but got \'%s\'' %
263 (get_flavor(), value))
264
265 else:
266 raise ConfigError('Unknown key %s' % key)
267
268 return data
269
270
271def fix_python_path(cmd):
272 """Returns the fixed command line to call the right python executable."""
273 out = cmd[:]
274 if out[0] == 'python':
275 out[0] = sys.executable
276 elif out[0].endswith('.py'):
277 out.insert(0, sys.executable)
278 return out
279
280
281class Profiler(object):
282 def __init__(self, name):
283 self.name = name
284 self.start_time = None
285
286 def __enter__(self):
287 self.start_time = time.time()
288 return self
289
290 def __exit__(self, _exc_type, _exec_value, _traceback):
291 time_taken = time.time() - self.start_time
292 logging.info('Profiling: Section %s took %3.3f seconds',
293 self.name, time_taken)
294
295
296class Remote(object):
297 """Priority based worker queue to fetch files from a content-address server.
298
299 Supports local file system, CIFS or http remotes.
300
301 When the priority of items is equals, works in strict FIFO mode.
302 """
303 # Initial and maximum number of worker threads.
304 INITIAL_WORKERS = 2
305 MAX_WORKERS = 16
306 # Priorities.
307 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
308 INTERNAL_PRIORITY_BITS = (1<<8) - 1
309 RETRIES = 5
310
311 def __init__(self, file_or_url):
312 # Function to fetch a remote object.
313 self._do_item = self._get_remote_fetcher(file_or_url)
314 # Contains tuple(priority, index, obj, destination).
315 self._queue = Queue.PriorityQueue()
316 # Contains tuple(priority, index, obj).
317 self._done = Queue.PriorityQueue()
318
319 # To keep FIFO ordering in self._queue. It is assumed xrange's iterator is
320 # thread-safe.
321 self._next_index = xrange(0, 1<<30).__iter__().next
322
323 # Control access to the following member.
324 self._ready_lock = threading.Lock()
325 # Number of threads in wait state.
326 self._ready = 0
327
328 # Control access to the following member.
329 self._workers_lock = threading.Lock()
330 self._workers = []
331 for _ in range(self.INITIAL_WORKERS):
332 self._add_worker()
333
334 def fetch_item(self, priority, obj, dest):
335 """Retrieves an object from the remote data store.
336
337 The smaller |priority| gets fetched first.
338
339 Thread-safe.
340 """
341 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
342 self._fetch(priority, obj, dest)
343
344 def get_result(self):
345 """Returns the next file that was successfully fetched."""
346 r = self._done.get()
347 if r[0] == '-1':
348 # It's an exception.
349 raise r[2][0], r[2][1], r[2][2]
350 return r[2]
351
352 def _fetch(self, priority, obj, dest):
353 with self._ready_lock:
354 start_new_worker = not self._ready
355 self._queue.put((priority, self._next_index(), obj, dest))
356 if start_new_worker:
357 self._add_worker()
358
359 def _add_worker(self):
360 """Add one worker thread if there isn't too many. Thread-safe."""
361 with self._workers_lock:
362 if len(self._workers) >= self.MAX_WORKERS:
363 return False
364 worker = threading.Thread(target=self._run)
365 self._workers.append(worker)
366 worker.daemon = True
367 worker.start()
368
369 def _run(self):
370 """Worker thread loop."""
371 while True:
372 try:
373 with self._ready_lock:
374 self._ready += 1
375 item = self._queue.get()
376 finally:
377 with self._ready_lock:
378 self._ready -= 1
379 if not item:
380 return
381 priority, index, obj, dest = item
382 try:
383 self._do_item(obj, dest)
384 except IOError:
385 # Retry a few times, lowering the priority.
386 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
387 self._fetch(priority + 1, obj, dest)
388 continue
389 # Transfers the exception back. It has maximum priority.
390 self._done.put((-1, 0, sys.exc_info()))
391 except:
392 # Transfers the exception back. It has maximum priority.
393 self._done.put((-1, 0, sys.exc_info()))
394 else:
395 self._done.put((priority, index, obj))
396
397 @staticmethod
398 def _get_remote_fetcher(file_or_url):
399 """Returns a object to retrieve objects from a remote."""
400 if re.match(r'^https?://.+$', file_or_url):
401 if not file_or_url.endswith('='):
402 file_or_url = file_or_url.rstrip('/') + '/'
403 def download_file(item, dest):
404 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
405 # easy.
406 source = file_or_url + item
407 logging.debug('download_file(%s, %s)', source, dest)
408 urllib.urlretrieve(source, dest)
409 return download_file
410
411 def copy_file(item, dest):
412 source = os.path.join(file_or_url, item)
413 logging.debug('copy_file(%s, %s)', source, dest)
414 shutil.copy(source, dest)
415 return copy_file
416
417
418class CachePolicies(object):
419 def __init__(self, max_cache_size, min_free_space, max_items):
420 """
421 Arguments:
422 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
423 cache is effectively a leak.
424 - min_free_space: Trim if disk free space becomes lower than this value. If
425 0, it unconditionally fill the disk.
426 - max_items: Maximum number of items to keep in the cache. If 0, do not
427 enforce a limit.
428 """
429 self.max_cache_size = max_cache_size
430 self.min_free_space = min_free_space
431 self.max_items = max_items
432
433
434class Cache(object):
435 """Stateful LRU cache.
436
437 Saves its state as json file.
438 """
439 STATE_FILE = 'state.json'
440
441 def __init__(self, cache_dir, remote, policies):
442 """
443 Arguments:
444 - cache_dir: Directory where to place the cache.
445 - remote: Remote where to fetch items from.
446 - policies: cache retention policies.
447 """
448 self.cache_dir = cache_dir
449 self.remote = remote
450 self.policies = policies
451 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
452 # The tuple(file, size) are kept as an array in a LRU style. E.g.
453 # self.state[0] is the oldest item.
454 self.state = []
455 # A lookup map to speed up searching.
456 self._lookup = {}
457 self._dirty = False
458
459 # Items currently being fetched. Keep it local to reduce lock contention.
460 self._pending_queue = set()
461
462 # Profiling values.
463 self._added = []
464 self._removed = []
465 self._free_disk = 0
466
467 if not os.path.isdir(self.cache_dir):
468 os.makedirs(self.cache_dir)
469 if os.path.isfile(self.state_file):
470 try:
471 self.state = json.load(open(self.state_file, 'r'))
472 except (IOError, ValueError), e:
473 # Too bad. The file will be overwritten and the cache cleared.
474 logging.error(
475 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
476 if (not isinstance(self.state, list) or
477 not all(
478 isinstance(i, (list, tuple)) and len(i) == 2 for i in self.state)):
479 # Discard.
480 self.state = []
481 self._dirty = True
482
483 # Ensure that all files listed in the state still exist and add new ones.
484 previous = set(filename for filename, _ in self.state)
485 if len(previous) != len(self.state):
486 logging.warn('Cache state is corrupted')
487 self._dirty = True
488 self.state = []
489 else:
490 added = 0
491 for filename in os.listdir(self.cache_dir):
492 if filename == self.STATE_FILE:
493 continue
494 if filename in previous:
495 previous.remove(filename)
496 continue
497 # An untracked file.
498 self._dirty = True
499 if not RE_IS_SHA1.match(filename):
500 logging.warn('Removing unknown file %s from cache', filename)
501 os.remove(self.path(filename))
502 else:
503 # Insert as the oldest file. It will be deleted eventually if not
504 # accessed.
505 self._add(filename, False)
506 added += 1
507 if added:
508 logging.warn('Added back %d unknown files', added)
509 self.state = [
510 (filename, size) for filename, size in self.state
511 if filename not in previous
512 ]
513 self._update_lookup()
514
515 with Profiler('SetupTrimming'):
516 self.trim()
517
518 def __enter__(self):
519 return self
520
521 def __exit__(self, _exc_type, _exec_value, _traceback):
522 with Profiler('CleanupTrimming'):
523 self.trim()
524
525 logging.info(
526 '%4d (%7dkb) added', len(self._added), sum(self._added) / 1024)
527 logging.info(
528 '%4d (%7dkb) current',
529 len(self.state),
530 sum(i[1] for i in self.state) / 1024)
531 logging.info(
532 '%4d (%7dkb) removed', len(self._removed), sum(self._removed) / 1024)
533 logging.info('%7dkb free', self._free_disk / 1024)
534
535 def remove_lru_file(self):
536 """Removes the last recently used file."""
537 try:
538 filename, size = self.state.pop(0)
539 del self._lookup[filename]
540 self._removed.append(size)
541 os.remove(self.path(filename))
542 self._dirty = True
543 except OSError as e:
544 logging.error('Error attempting to delete a file\n%s' % e)
545
546 def trim(self):
547 """Trims anything we don't know, make sure enough free space exists."""
548 # Ensure maximum cache size.
549 if self.policies.max_cache_size and self.state:
550 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
551 self.remove_lru_file()
552
553 # Ensure maximum number of items in the cache.
554 if self.policies.max_items and self.state:
555 while len(self.state) > self.policies.max_items:
556 self.remove_lru_file()
557
558 # Ensure enough free space.
559 self._free_disk = get_free_space(self.cache_dir)
560 while (
561 self.policies.min_free_space and
562 self.state and
563 self._free_disk < self.policies.min_free_space):
564 self.remove_lru_file()
565 self._free_disk = get_free_space(self.cache_dir)
566
567 self.save()
568
569 def retrieve(self, priority, item):
570 """Retrieves a file from the remote, if not already cached, and adds it to
571 the cache.
572 """
573 assert not '/' in item
574 path = self.path(item)
575 index = self._lookup.get(item)
576 if index is None:
577 if item in self._pending_queue:
578 # Already pending. The same object could be referenced multiple times.
579 return
580 self.remote.fetch_item(priority, item, path)
581 self._pending_queue.add(item)
582 else:
583 if index != len(self.state) - 1:
584 # Was already in cache. Update it's LRU value by putting it at the end.
585 self.state.append(self.state.pop(index))
586 self._dirty = True
587 self._update_lookup()
588
589 def add(self, filepath, obj):
590 """Forcibly adds a file to the cache."""
591 if not obj in self._lookup:
592 link_file(self.path(obj), filepath, HARDLINK)
593 self._add(obj, True)
594
595 def path(self, item):
596 """Returns the path to one item."""
597 return os.path.join(self.cache_dir, item)
598
599 def save(self):
600 """Saves the LRU ordering."""
601 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
602
603 def wait_for(self, items):
604 """Starts a loop that waits for at least one of |items| to be retrieved.
605
606 Returns the first item retrieved.
607 """
608 # Flush items already present.
609 for item in items:
610 if item in self._lookup:
611 return item
612
613 assert all(i in self._pending_queue for i in items), (
614 items, self._pending_queue)
615 # Note that:
616 # len(self._pending_queue) ==
617 # ( len(self.remote._workers) - self.remote._ready +
618 # len(self._remote._queue) + len(self._remote.done))
619 # There is no lock-free way to verify that.
620 while self._pending_queue:
621 item = self.remote.get_result()
622 self._pending_queue.remove(item)
623 self._add(item, True)
624 if item in items:
625 return item
626
627 def _add(self, item, at_end):
628 """Adds an item in the internal state.
629
630 If |at_end| is False, self._lookup becomes inconsistent and
631 self._update_lookup() must be called.
632 """
633 size = os.stat(self.path(item)).st_size
634 self._added.append(size)
635 if at_end:
636 self.state.append((item, size))
637 self._lookup[item] = len(self.state) - 1
638 else:
639 self.state.insert(0, (item, size))
640 self._dirty = True
641
642 def _update_lookup(self):
643 self._lookup = dict(
644 (filename, index) for index, (filename, _) in enumerate(self.state))
645
646
647
648class Manifest(object):
649 """Represents a single parsed manifest, e.g. a .results file."""
650 def __init__(self, obj_hash):
651 """|obj_hash| is really the sha-1 of the file."""
652 logging.debug('Manifest(%s)' % obj_hash)
653 self.obj_hash = obj_hash
654 # Set once all the left-side of the tree is parsed. 'Tree' here means the
655 # manifest and all the manifest recursively included by it with 'includes'
656 # key. The order of each manifest sha-1 in 'includes' is important, as the
657 # later ones are not processed until the firsts are retrieved and read.
658 self.can_fetch = False
659
660 # Raw data.
661 self.data = {}
662 # A Manifest instance, one per object in self.includes.
663 self.children = []
664
665 # Set once the manifest is loaded.
666 self._manifest_parsed = False
667 # Set once the files are fetched.
668 self.files_fetched = False
669
670 def load(self, content):
671 """Verifies the manifest is valid and loads this object with the json data.
672 """
673 logging.debug('Manifest.load(%s)' % self.obj_hash)
674 assert not self._manifest_parsed
675 self.data = load_manifest(content)
676 self.children = [Manifest(i) for i in self.data.get('includes', [])]
677 self._manifest_parsed = True
678
679 def fetch_files(self, cache, files):
680 """Adds files in this manifest not present in files dictionary.
681
682 Preemptively request files.
683
684 Note that |files| is modified by this function.
685 """
686 assert self.can_fetch
687 if not self._manifest_parsed or self.files_fetched:
688 return
689 logging.debug('fetch_files(%s)' % self.obj_hash)
690 for filepath, properties in self.data.get('files', {}).iteritems():
691 # Root manifest has priority on the files being mapped. In particular,
692 # overriden files must not be fetched.
693 if filepath not in files:
694 files[filepath] = properties
695 if 'sha-1' in properties:
696 # Preemptively request files.
697 logging.debug('fetching %s' % filepath)
698 cache.retrieve(Remote.MED, properties['sha-1'])
699 self.files_fetched = True
700
701
702class Settings(object):
703 """Results of a completely parsed manifest."""
704 def __init__(self):
705 self.command = []
706 self.files = {}
707 self.read_only = None
708 self.relative_cwd = None
709 # The main manifest.
710 self.root = None
711 logging.debug('Settings')
712
713 def load(self, cache, root_manifest_hash):
714 """Loads the manifest and all the included manifests asynchronously.
715
716 It enables support for included manifest. They are processed in strict order
717 but fetched asynchronously from the cache. This is important so that a file
718 in an included manifest that is overridden by an embedding manifest is not
719 fetched neededlessly. The includes are fetched in one pass and the files are
720 fetched as soon as all the manifests on the left-side of the tree were
721 fetched.
722
723 The prioritization is very important here for nested manifests. 'includes'
724 have the highest priority and the algorithm is optimized for both deep and
725 wide manifests. A deep one is a long link of manifest referenced one at a
726 time by one item in 'includes'. A wide one has a large number of 'includes'
727 in a single manifest. 'left' is defined as an included manifest earlier in
728 the 'includes' list. So the order of the elements in 'includes' is
729 important.
730 """
731 self.root = Manifest(root_manifest_hash)
732 cache.retrieve(Remote.HIGH, root_manifest_hash)
733 pending = {root_manifest_hash: self.root}
734 # Keeps the list of retrieved items to refuse recursive includes.
735 retrieved = [root_manifest_hash]
736
737 def update_self(node):
738 node.fetch_files(cache, self.files)
739 # Grabs properties.
740 if not self.command and node.data.get('command'):
741 self.command = node.data['command']
742 if self.read_only is None and node.data.get('read_only') is not None:
743 self.read_only = node.data['read_only']
744 if (self.relative_cwd is None and
745 node.data.get('relative_cwd') is not None):
746 self.relative_cwd = node.data['relative_cwd']
747
748 def traverse_tree(node):
749 if node.can_fetch:
750 if not node.files_fetched:
751 update_self(node)
752 will_break = False
753 for i in node.children:
754 if not i.can_fetch:
755 if will_break:
756 break
757 # Automatically mark the first one as fetcheable.
758 i.can_fetch = True
759 will_break = True
760 traverse_tree(i)
761
762 while pending:
763 item_hash = cache.wait_for(pending)
764 item = pending.pop(item_hash)
765 item.load(open(cache.path(item_hash), 'r').read())
766 if item_hash == root_manifest_hash:
767 # It's the root item.
768 item.can_fetch = True
769
770 for new_child in item.children:
771 h = new_child.obj_hash
772 if h in retrieved:
773 raise ConfigError('Manifest %s is retrieved recursively' % h)
774 pending[h] = new_child
775 cache.retrieve(Remote.HIGH, h)
776
777 # Traverse the whole tree to see if files can now be fetched.
778 traverse_tree(self.root)
779 def check(n):
780 return all(check(x) for x in n.children) and n.files_fetched
781 assert check(self.root)
782 self.relative_cwd = self.relative_cwd or ''
783 self.read_only = self.read_only or False
784
785
786def run_tha_test(manifest_hash, cache_dir, remote, policies):
787 """Downloads the dependencies in the cache, hardlinks them into a temporary
788 directory and runs the executable.
789 """
790 settings = Settings()
791 with Cache(cache_dir, Remote(remote), policies) as cache:
792 outdir = make_temp_dir('run_tha_test', cache_dir)
793 try:
794 # Initiate all the files download.
795 with Profiler('GetManifests') as _prof:
796 # Optionally support local files.
797 if not RE_IS_SHA1.match(manifest_hash):
798 # Adds it in the cache. While not strictly necessary, this simplifies
799 # the rest.
800 h = hashlib.sha1(open(manifest_hash, 'r').read()).hexdigest()
801 cache.add(manifest_hash, h)
802 manifest_hash = h
803 settings.load(cache, manifest_hash)
804
805 if not settings.command:
806 print >> sys.stderr, 'No command to run'
807 return 1
808
809 with Profiler('GetRest') as _prof:
810 logging.debug('Creating directories')
811 # Creates the tree of directories to create.
812 directories = set(os.path.dirname(f) for f in settings.files)
813 for item in list(directories):
814 while item:
815 directories.add(item)
816 item = os.path.dirname(item)
817 for d in sorted(directories):
818 if d:
819 os.mkdir(os.path.join(outdir, d))
820
821 # Creates the links if necessary.
822 for filepath, properties in settings.files.iteritems():
823 if 'link' not in properties:
824 continue
825 outfile = os.path.join(outdir, filepath)
826 os.symlink(properties['link'], outfile)
827 if 'mode' in properties:
828 # It's not set on Windows.
829 os.chmod(outfile, properties['mode'])
830
831 # Remaining files to be processed.
832 # Note that files could still be not be downloaded yet here.
833 remaining = dict()
834 for filepath, props in settings.files.iteritems():
835 if 'sha-1' in props:
836 remaining.setdefault(props['sha-1'], []).append((filepath, props))
837
838 # Do bookkeeping while files are being downloaded in the background.
839 cwd = os.path.join(outdir, settings.relative_cwd)
840 if not os.path.isdir(cwd):
841 os.makedirs(cwd)
842 cmd = settings.command[:]
843 # Ensure paths are correctly separated on windows.
844 cmd[0] = cmd[0].replace('/', os.path.sep)
845 cmd = fix_python_path(cmd)
846
847 # Now block on the remaining files to be downloaded and mapped.
848 while remaining:
849 obj = cache.wait_for(remaining)
850 for filepath, properties in remaining.pop(obj):
851 outfile = os.path.join(outdir, filepath)
852 link_file(outfile, cache.path(obj), HARDLINK)
853 if 'mode' in properties:
854 # It's not set on Windows.
855 os.chmod(outfile, properties['mode'])
856
857 if settings.read_only:
858 make_writable(outdir, True)
859 logging.info('Running %s, cwd=%s' % (cmd, cwd))
860 try:
861 with Profiler('RunTest') as _prof:
862 return subprocess.call(cmd, cwd=cwd)
863 except OSError:
864 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
865 raise
866 finally:
867 rmtree(outdir)
868
869
870def main():
871 parser = optparse.OptionParser(
872 usage='%prog <options>', description=sys.modules[__name__].__doc__)
873 parser.add_option(
874 '-v', '--verbose', action='count', default=0, help='Use multiple times')
875 parser.add_option('--no-run', action='store_true', help='Skip the run part')
876
877 group = optparse.OptionGroup(parser, 'Data source')
878 group.add_option(
879 '-m', '--manifest',
880 metavar='FILE',
881 help='File/url describing what to map or run')
882 group.add_option(
883 '-H', '--hash',
884 help='Hash of the manifest to grab from the hash table')
885 parser.add_option_group(group)
886
887 group.add_option(
888 '-r', '--remote', metavar='URL', help='Remote where to get the items')
889 group = optparse.OptionGroup(parser, 'Cache management')
890 group.add_option(
891 '--cache',
892 default='cache',
893 metavar='DIR',
894 help='Cache directory, default=%default')
895 group.add_option(
896 '--max-cache-size',
897 type='int',
898 metavar='NNN',
899 default=20*1024*1024*1024,
900 help='Trim if the cache gets larger than this value, default=%default')
901 group.add_option(
902 '--min-free-space',
903 type='int',
904 metavar='NNN',
905 default=1*1024*1024*1024,
906 help='Trim if disk free space becomes lower than this value, '
907 'default=%default')
908 group.add_option(
909 '--max-items',
910 type='int',
911 metavar='NNN',
912 default=100000,
913 help='Trim if more than this number of items are in the cache '
914 'default=%default')
915 parser.add_option_group(group)
916
917 options, args = parser.parse_args()
918 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
919 logging.basicConfig(
920 level=level,
921 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
922
923 if bool(options.manifest) == bool(options.hash):
924 parser.error('One and only one of --manifest or --hash is required.')
925 if not options.remote:
926 parser.error('--remote is required.')
927 if args:
928 parser.error('Unsupported args %s' % ' '.join(args))
929
930 policies = CachePolicies(
931 options.max_cache_size, options.min_free_space, options.max_items)
932 try:
933 return run_tha_test(
934 options.manifest or options.hash,
935 os.path.abspath(options.cache),
936 options.remote,
937 policies)
938 except (ConfigError, MappingError), e:
939 print >> sys.stderr, str(e)
940 return 1
941
942
943if __name__ == '__main__':
944 sys.exit(main())