blob: 7086cd56ea621a16c60778e40593f00148ee9491 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000011import ctypes
12import hashlib
13import json
14import logging
15import optparse
16import os
17import Queue
18import re
19import shutil
20import stat
21import subprocess
22import sys
23import tempfile
24import threading
25import time
26import urllib
27
28
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000029# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000030HARDLINK, SYMLINK, COPY = range(1, 4)
31
32RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
33
34
35class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +000036 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000037 pass
38
39
40class MappingError(OSError):
41 """Failed to recreate the tree."""
42 pass
43
44
45def get_flavor():
46 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
47 flavors = {
48 'cygwin': 'win',
49 'win32': 'win',
50 'darwin': 'mac',
51 'sunos5': 'solaris',
52 'freebsd7': 'freebsd',
53 'freebsd8': 'freebsd',
54 }
55 return flavors.get(sys.platform, 'linux')
56
57
58def os_link(source, link_name):
59 """Add support for os.link() on Windows."""
60 if sys.platform == 'win32':
61 if not ctypes.windll.kernel32.CreateHardLinkW(
62 unicode(link_name), unicode(source), 0):
63 raise OSError()
64 else:
65 os.link(source, link_name)
66
67
68def readable_copy(outfile, infile):
69 """Makes a copy of the file that is readable by everyone."""
70 shutil.copy(infile, outfile)
71 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
72 stat.S_IRGRP | stat.S_IROTH)
73 os.chmod(outfile, read_enabled_mode)
74
75
76def link_file(outfile, infile, action):
77 """Links a file. The type of link depends on |action|."""
78 logging.debug('Mapping %s to %s' % (infile, outfile))
79 if action not in (HARDLINK, SYMLINK, COPY):
80 raise ValueError('Unknown mapping action %s' % action)
81 if not os.path.isfile(infile):
82 raise MappingError('%s is missing' % infile)
83 if os.path.isfile(outfile):
84 raise MappingError(
85 '%s already exist; insize:%d; outsize:%d' %
86 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
87
88 if action == COPY:
89 readable_copy(outfile, infile)
90 elif action == SYMLINK and sys.platform != 'win32':
91 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +000092 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000093 else:
94 try:
95 os_link(infile, outfile)
96 except OSError:
97 # Probably a different file system.
98 logging.warn(
99 'Failed to hardlink, failing back to copy %s to %s' % (
100 infile, outfile))
101 readable_copy(outfile, infile)
102
103
104def _set_write_bit(path, read_only):
105 """Sets or resets the executable bit on a file or directory."""
106 mode = os.lstat(path).st_mode
107 if read_only:
108 mode = mode & 0500
109 else:
110 mode = mode | 0200
111 if hasattr(os, 'lchmod'):
112 os.lchmod(path, mode) # pylint: disable=E1101
113 else:
114 if stat.S_ISLNK(mode):
115 # Skip symlink without lchmod() support.
116 logging.debug('Can\'t change +w bit on symlink %s' % path)
117 return
118
119 # TODO(maruel): Implement proper DACL modification on Windows.
120 os.chmod(path, mode)
121
122
123def make_writable(root, read_only):
124 """Toggle the writable bit on a directory tree."""
125 root = os.path.abspath(root)
126 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
127 for filename in filenames:
128 _set_write_bit(os.path.join(dirpath, filename), read_only)
129
130 for dirname in dirnames:
131 _set_write_bit(os.path.join(dirpath, dirname), read_only)
132
133
134def rmtree(root):
135 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
136 make_writable(root, False)
137 if sys.platform == 'win32':
138 for i in range(3):
139 try:
140 shutil.rmtree(root)
141 break
142 except WindowsError: # pylint: disable=E0602
143 delay = (i+1)*2
144 print >> sys.stderr, (
145 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
146 time.sleep(delay)
147 else:
148 shutil.rmtree(root)
149
150
151def is_same_filesystem(path1, path2):
152 """Returns True if both paths are on the same filesystem.
153
154 This is required to enable the use of hardlinks.
155 """
156 assert os.path.isabs(path1), path1
157 assert os.path.isabs(path2), path2
158 if sys.platform == 'win32':
159 # If the drive letter mismatches, assume it's a separate partition.
160 # TODO(maruel): It should look at the underlying drive, a drive letter could
161 # be a mount point to a directory on another drive.
162 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
163 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
164 if path1[0].lower() != path2[0].lower():
165 return False
166 return os.stat(path1).st_dev == os.stat(path2).st_dev
167
168
169def get_free_space(path):
170 """Returns the number of free bytes."""
171 if sys.platform == 'win32':
172 free_bytes = ctypes.c_ulonglong(0)
173 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
174 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
175 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000176 # For OSes other than Windows.
177 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000178 return f.f_bfree * f.f_frsize
179
180
181def make_temp_dir(prefix, root_dir):
182 """Returns a temporary directory on the same file system as root_dir."""
183 base_temp_dir = None
184 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
185 base_temp_dir = os.path.dirname(root_dir)
186 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
187
188
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000189def load_isolated(content):
190 """Verifies the .isolated file is valid and loads this object with the json
191 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000192 """
193 try:
194 data = json.loads(content)
195 except ValueError:
196 raise ConfigError('Failed to parse: %s...' % content[:100])
197
198 if not isinstance(data, dict):
199 raise ConfigError('Expected dict, got %r' % data)
200
201 for key, value in data.iteritems():
202 if key == 'command':
203 if not isinstance(value, list):
204 raise ConfigError('Expected list, got %r' % value)
205 for subvalue in value:
206 if not isinstance(subvalue, basestring):
207 raise ConfigError('Expected string, got %r' % subvalue)
208
209 elif key == 'files':
210 if not isinstance(value, dict):
211 raise ConfigError('Expected dict, got %r' % value)
212 for subkey, subvalue in value.iteritems():
213 if not isinstance(subkey, basestring):
214 raise ConfigError('Expected string, got %r' % subkey)
215 if not isinstance(subvalue, dict):
216 raise ConfigError('Expected dict, got %r' % subvalue)
217 for subsubkey, subsubvalue in subvalue.iteritems():
218 if subsubkey == 'link':
219 if not isinstance(subsubvalue, basestring):
220 raise ConfigError('Expected string, got %r' % subsubvalue)
221 elif subsubkey == 'mode':
222 if not isinstance(subsubvalue, int):
223 raise ConfigError('Expected int, got %r' % subsubvalue)
224 elif subsubkey == 'sha-1':
225 if not RE_IS_SHA1.match(subsubvalue):
226 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
227 elif subsubkey == 'size':
228 if not isinstance(subsubvalue, int):
229 raise ConfigError('Expected int, got %r' % subsubvalue)
230 elif subsubkey == 'timestamp':
231 if not isinstance(subsubvalue, int):
232 raise ConfigError('Expected int, got %r' % subsubvalue)
233 elif subsubkey == 'touched_only':
234 if not isinstance(subsubvalue, bool):
235 raise ConfigError('Expected bool, got %r' % subsubvalue)
236 else:
237 raise ConfigError('Unknown subsubkey %s' % subsubkey)
238 if bool('sha-1' in subvalue) and bool('link' in subvalue):
239 raise ConfigError(
240 'Did not expect both \'sha-1\' and \'link\', got: %r' % subvalue)
241
242 elif key == 'includes':
243 if not isinstance(value, list):
244 raise ConfigError('Expected list, got %r' % value)
245 for subvalue in value:
246 if not RE_IS_SHA1.match(subvalue):
247 raise ConfigError('Expected sha-1, got %r' % subvalue)
248
249 elif key == 'read_only':
250 if not isinstance(value, bool):
251 raise ConfigError('Expected bool, got %r' % value)
252
253 elif key == 'relative_cwd':
254 if not isinstance(value, basestring):
255 raise ConfigError('Expected string, got %r' % value)
256
257 elif key == 'os':
258 if value != get_flavor():
259 raise ConfigError(
260 'Expected \'os\' to be \'%s\' but got \'%s\'' %
261 (get_flavor(), value))
262
263 else:
264 raise ConfigError('Unknown key %s' % key)
265
266 return data
267
268
269def fix_python_path(cmd):
270 """Returns the fixed command line to call the right python executable."""
271 out = cmd[:]
272 if out[0] == 'python':
273 out[0] = sys.executable
274 elif out[0].endswith('.py'):
275 out.insert(0, sys.executable)
276 return out
277
278
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000279class WorkerThread(threading.Thread):
280 """Keeps the results of each task in a thread-local outputs variable."""
281 def __init__(self, tasks, *args, **kwargs):
282 super(WorkerThread, self).__init__(*args, **kwargs)
283 self._tasks = tasks
284 self.outputs = []
285 self.exceptions = []
286
287 self.daemon = True
288 self.start()
289
290 def run(self):
291 """Runs until a None task is queued."""
292 while True:
293 task = self._tasks.get()
294 if task is None:
295 logging.debug('Worker thread %s exiting, no more tasks found',
296 self.name)
297 # We're done.
298 return
299 try:
300 func, args, kwargs = task
301 logging.debug('Runnings %s with parameters %s and %s', func, args,
302 kwargs)
303 self.outputs.append(func(*args, **kwargs))
304 except Exception, e:
305 logging.error('Caught exception! %s' % e)
306 self.exceptions.append(sys.exc_info())
307 finally:
308 self._tasks.task_done()
309
310
311class ThreadPool(object):
312 """Implements a multithreaded worker pool oriented for mapping jobs with
313 thread-local result storage.
314 """
315 QUEUE_CLASS = Queue.Queue
316
317 def __init__(self, num_threads):
318 logging.debug('Creating ThreadPool')
319 self._tasks = self.QUEUE_CLASS()
320 self._workers = [
321 WorkerThread(self._tasks, name='worker-%d' % i)
322 for i in range(num_threads)
323 ]
324
325 def add_task(self, func, *args, **kwargs):
326 """Adds a task, a function to be executed by a worker.
327
328 The function's return value will be stored in the the worker's thread local
329 outputs list.
330 """
331 self._tasks.put((func, args, kwargs))
332
333 def join(self, progress):
334 """Extracts all the results from each threads unordered."""
335 if progress:
336 # Too many positional arguments for function call
337 # pylint: disable=E1121
338 self._tasks.join(progress)
339 else:
340 # pylint: disable=E1121
341 self._tasks.join()
342 out = []
343 # Look for exceptions.
344 for w in self._workers:
345 if w.exceptions:
346 raise w.exceptions[0][0], w.exceptions[0][1], w.exceptions[0][2]
347 out.extend(w.outputs)
348 w.outputs = []
349 return out
350
351 def close(self):
352 """Closes all the threads."""
353 for _ in range(len(self._workers)):
354 # Enqueueing None causes the worker to stop.
355 self._tasks.put(None)
356 for t in self._workers:
357 t.join()
358
359 def __enter__(self):
360 """Enables 'with' statement."""
361 return self
362
363 def __exit__(self, exc_type, exc_value, traceback):
364 """Enables 'with' statement."""
365 self.close()
366
367
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000368class Profiler(object):
369 def __init__(self, name):
370 self.name = name
371 self.start_time = None
372
373 def __enter__(self):
374 self.start_time = time.time()
375 return self
376
377 def __exit__(self, _exc_type, _exec_value, _traceback):
378 time_taken = time.time() - self.start_time
379 logging.info('Profiling: Section %s took %3.3f seconds',
380 self.name, time_taken)
381
382
383class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000384 """Priority based worker queue to fetch or upload files from a
385 content-address server. Any function may be given as the fetcher/upload,
386 as long as it takes two inputs (the item contents, and their relative
387 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000388
389 Supports local file system, CIFS or http remotes.
390
391 When the priority of items is equals, works in strict FIFO mode.
392 """
393 # Initial and maximum number of worker threads.
394 INITIAL_WORKERS = 2
395 MAX_WORKERS = 16
396 # Priorities.
397 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
398 INTERNAL_PRIORITY_BITS = (1<<8) - 1
399 RETRIES = 5
400
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000401 def __init__(self, destination_root):
402 # Function to fetch a remote object or upload to a remote location..
403 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000404 # Contains tuple(priority, index, obj, destination).
405 self._queue = Queue.PriorityQueue()
406 # Contains tuple(priority, index, obj).
407 self._done = Queue.PriorityQueue()
408
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000409 # Contains generated exceptions that haven't been handled yet.
410 self._exceptions = Queue.Queue()
411
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000412 # To keep FIFO ordering in self._queue. It is assumed xrange's iterator is
413 # thread-safe.
414 self._next_index = xrange(0, 1<<30).__iter__().next
415
416 # Control access to the following member.
417 self._ready_lock = threading.Lock()
418 # Number of threads in wait state.
419 self._ready = 0
420
421 # Control access to the following member.
422 self._workers_lock = threading.Lock()
423 self._workers = []
424 for _ in range(self.INITIAL_WORKERS):
425 self._add_worker()
426
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000427 def join(self):
428 """Blocks until the queue is empty."""
429 self._queue.join()
430
431 def next_exception(self):
432 """Returns the next unhandled exception, or None if there is
433 no exception."""
434 try:
435 return self._exceptions.get_nowait()
436 except Queue.Empty:
437 return None
438
439 def add_item(self, priority, obj, dest):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000440 """Retrieves an object from the remote data store.
441
442 The smaller |priority| gets fetched first.
443
444 Thread-safe.
445 """
446 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000447 self._add_to_queue(priority, obj, dest)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000448
449 def get_result(self):
450 """Returns the next file that was successfully fetched."""
451 r = self._done.get()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000452 if r[0] == -1:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000453 # It's an exception.
454 raise r[2][0], r[2][1], r[2][2]
455 return r[2]
456
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000457 def _add_to_queue(self, priority, obj, dest):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000458 with self._ready_lock:
459 start_new_worker = not self._ready
460 self._queue.put((priority, self._next_index(), obj, dest))
461 if start_new_worker:
462 self._add_worker()
463
464 def _add_worker(self):
465 """Add one worker thread if there isn't too many. Thread-safe."""
466 with self._workers_lock:
467 if len(self._workers) >= self.MAX_WORKERS:
468 return False
469 worker = threading.Thread(target=self._run)
470 self._workers.append(worker)
471 worker.daemon = True
472 worker.start()
473
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000474 def _step_done(self, result):
475 """Worker helper function"""
476 self._done.put(result)
477 self._queue.task_done()
478 if result[0] == -1:
479 self._exceptions.put(sys.exc_info())
480
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000481 def _run(self):
482 """Worker thread loop."""
483 while True:
484 try:
485 with self._ready_lock:
486 self._ready += 1
487 item = self._queue.get()
488 finally:
489 with self._ready_lock:
490 self._ready -= 1
491 if not item:
492 return
493 priority, index, obj, dest = item
494 try:
495 self._do_item(obj, dest)
496 except IOError:
497 # Retry a few times, lowering the priority.
498 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000499 self._add_to_queue(priority + 1, obj, dest)
500 self._queue.task_done()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000501 continue
502 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000503 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000504 except:
505 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000506 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000507 else:
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000508 self._step_done((priority, index, obj))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000509
510 @staticmethod
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000511 def get_file_handler(file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000512 """Returns a object to retrieve objects from a remote."""
513 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org986c2c42012-10-04 14:39:33 +0000514 # TODO(maruel): This is particularly hackish. It shouldn't rstrip('/') in
515 # the first place or try to append '/'.
516 if not file_or_url.endswith('='):
517 file_or_url = file_or_url.rstrip('/') + '/'
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000518 def download_file(item, dest):
519 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
520 # easy.
521 source = file_or_url + item
522 logging.debug('download_file(%s, %s)', source, dest)
523 urllib.urlretrieve(source, dest)
524 return download_file
525
526 def copy_file(item, dest):
527 source = os.path.join(file_or_url, item)
528 logging.debug('copy_file(%s, %s)', source, dest)
529 shutil.copy(source, dest)
530 return copy_file
531
532
533class CachePolicies(object):
534 def __init__(self, max_cache_size, min_free_space, max_items):
535 """
536 Arguments:
537 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
538 cache is effectively a leak.
539 - min_free_space: Trim if disk free space becomes lower than this value. If
540 0, it unconditionally fill the disk.
541 - max_items: Maximum number of items to keep in the cache. If 0, do not
542 enforce a limit.
543 """
544 self.max_cache_size = max_cache_size
545 self.min_free_space = min_free_space
546 self.max_items = max_items
547
548
549class Cache(object):
550 """Stateful LRU cache.
551
552 Saves its state as json file.
553 """
554 STATE_FILE = 'state.json'
555
556 def __init__(self, cache_dir, remote, policies):
557 """
558 Arguments:
559 - cache_dir: Directory where to place the cache.
560 - remote: Remote where to fetch items from.
561 - policies: cache retention policies.
562 """
563 self.cache_dir = cache_dir
564 self.remote = remote
565 self.policies = policies
566 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
567 # The tuple(file, size) are kept as an array in a LRU style. E.g.
568 # self.state[0] is the oldest item.
569 self.state = []
570 # A lookup map to speed up searching.
571 self._lookup = {}
572 self._dirty = False
573
574 # Items currently being fetched. Keep it local to reduce lock contention.
575 self._pending_queue = set()
576
577 # Profiling values.
578 self._added = []
579 self._removed = []
580 self._free_disk = 0
581
582 if not os.path.isdir(self.cache_dir):
583 os.makedirs(self.cache_dir)
584 if os.path.isfile(self.state_file):
585 try:
586 self.state = json.load(open(self.state_file, 'r'))
587 except (IOError, ValueError), e:
588 # Too bad. The file will be overwritten and the cache cleared.
589 logging.error(
590 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
591 if (not isinstance(self.state, list) or
592 not all(
593 isinstance(i, (list, tuple)) and len(i) == 2 for i in self.state)):
594 # Discard.
595 self.state = []
596 self._dirty = True
597
598 # Ensure that all files listed in the state still exist and add new ones.
599 previous = set(filename for filename, _ in self.state)
600 if len(previous) != len(self.state):
601 logging.warn('Cache state is corrupted')
602 self._dirty = True
603 self.state = []
604 else:
605 added = 0
606 for filename in os.listdir(self.cache_dir):
607 if filename == self.STATE_FILE:
608 continue
609 if filename in previous:
610 previous.remove(filename)
611 continue
612 # An untracked file.
613 self._dirty = True
614 if not RE_IS_SHA1.match(filename):
615 logging.warn('Removing unknown file %s from cache', filename)
616 os.remove(self.path(filename))
617 else:
618 # Insert as the oldest file. It will be deleted eventually if not
619 # accessed.
620 self._add(filename, False)
621 added += 1
622 if added:
623 logging.warn('Added back %d unknown files', added)
624 self.state = [
625 (filename, size) for filename, size in self.state
626 if filename not in previous
627 ]
628 self._update_lookup()
629
630 with Profiler('SetupTrimming'):
631 self.trim()
632
633 def __enter__(self):
634 return self
635
636 def __exit__(self, _exc_type, _exec_value, _traceback):
637 with Profiler('CleanupTrimming'):
638 self.trim()
639
640 logging.info(
641 '%4d (%7dkb) added', len(self._added), sum(self._added) / 1024)
642 logging.info(
643 '%4d (%7dkb) current',
644 len(self.state),
645 sum(i[1] for i in self.state) / 1024)
646 logging.info(
647 '%4d (%7dkb) removed', len(self._removed), sum(self._removed) / 1024)
648 logging.info('%7dkb free', self._free_disk / 1024)
649
650 def remove_lru_file(self):
651 """Removes the last recently used file."""
652 try:
653 filename, size = self.state.pop(0)
654 del self._lookup[filename]
655 self._removed.append(size)
656 os.remove(self.path(filename))
657 self._dirty = True
658 except OSError as e:
659 logging.error('Error attempting to delete a file\n%s' % e)
660
661 def trim(self):
662 """Trims anything we don't know, make sure enough free space exists."""
663 # Ensure maximum cache size.
664 if self.policies.max_cache_size and self.state:
665 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
666 self.remove_lru_file()
667
668 # Ensure maximum number of items in the cache.
669 if self.policies.max_items and self.state:
670 while len(self.state) > self.policies.max_items:
671 self.remove_lru_file()
672
673 # Ensure enough free space.
674 self._free_disk = get_free_space(self.cache_dir)
675 while (
676 self.policies.min_free_space and
677 self.state and
678 self._free_disk < self.policies.min_free_space):
679 self.remove_lru_file()
680 self._free_disk = get_free_space(self.cache_dir)
681
682 self.save()
683
684 def retrieve(self, priority, item):
685 """Retrieves a file from the remote, if not already cached, and adds it to
686 the cache.
687 """
688 assert not '/' in item
689 path = self.path(item)
690 index = self._lookup.get(item)
691 if index is None:
692 if item in self._pending_queue:
693 # Already pending. The same object could be referenced multiple times.
694 return
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000695 self.remote.add_item(priority, item, path)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000696 self._pending_queue.add(item)
697 else:
698 if index != len(self.state) - 1:
699 # Was already in cache. Update it's LRU value by putting it at the end.
700 self.state.append(self.state.pop(index))
701 self._dirty = True
702 self._update_lookup()
703
704 def add(self, filepath, obj):
705 """Forcibly adds a file to the cache."""
706 if not obj in self._lookup:
707 link_file(self.path(obj), filepath, HARDLINK)
708 self._add(obj, True)
709
710 def path(self, item):
711 """Returns the path to one item."""
712 return os.path.join(self.cache_dir, item)
713
714 def save(self):
715 """Saves the LRU ordering."""
716 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
717
718 def wait_for(self, items):
719 """Starts a loop that waits for at least one of |items| to be retrieved.
720
721 Returns the first item retrieved.
722 """
723 # Flush items already present.
724 for item in items:
725 if item in self._lookup:
726 return item
727
728 assert all(i in self._pending_queue for i in items), (
729 items, self._pending_queue)
730 # Note that:
731 # len(self._pending_queue) ==
732 # ( len(self.remote._workers) - self.remote._ready +
733 # len(self._remote._queue) + len(self._remote.done))
734 # There is no lock-free way to verify that.
735 while self._pending_queue:
736 item = self.remote.get_result()
737 self._pending_queue.remove(item)
738 self._add(item, True)
739 if item in items:
740 return item
741
742 def _add(self, item, at_end):
743 """Adds an item in the internal state.
744
745 If |at_end| is False, self._lookup becomes inconsistent and
746 self._update_lookup() must be called.
747 """
748 size = os.stat(self.path(item)).st_size
749 self._added.append(size)
750 if at_end:
751 self.state.append((item, size))
752 self._lookup[item] = len(self.state) - 1
753 else:
754 self.state.insert(0, (item, size))
755 self._dirty = True
756
757 def _update_lookup(self):
758 self._lookup = dict(
759 (filename, index) for index, (filename, _) in enumerate(self.state))
760
761
762
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000763class IsolatedFile(object):
764 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000765 def __init__(self, obj_hash):
766 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000767 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000768 self.obj_hash = obj_hash
769 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000770 # .isolate and all the .isolated files recursively included by it with
771 # 'includes' key. The order of each sha-1 in 'includes', each representing a
772 # .isolated file in the hash table, is important, as the later ones are not
773 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000774 self.can_fetch = False
775
776 # Raw data.
777 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000778 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000779 self.children = []
780
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000781 # Set once the .isolated file is loaded.
782 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000783 # Set once the files are fetched.
784 self.files_fetched = False
785
786 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000787 """Verifies the .isolated file is valid and loads this object with the json
788 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000789 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000790 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
791 assert not self._is_parsed
792 self.data = load_isolated(content)
793 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
794 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000795
796 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000797 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000798
799 Preemptively request files.
800
801 Note that |files| is modified by this function.
802 """
803 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000804 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000805 return
806 logging.debug('fetch_files(%s)' % self.obj_hash)
807 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000808 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000809 # overriden files must not be fetched.
810 if filepath not in files:
811 files[filepath] = properties
812 if 'sha-1' in properties:
813 # Preemptively request files.
814 logging.debug('fetching %s' % filepath)
815 cache.retrieve(Remote.MED, properties['sha-1'])
816 self.files_fetched = True
817
818
819class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000820 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000821 def __init__(self):
822 self.command = []
823 self.files = {}
824 self.read_only = None
825 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000826 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000827 self.root = None
828 logging.debug('Settings')
829
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000830 def load(self, cache, root_isolated_hash):
831 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000832
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000833 It enables support for "included" .isolated files. They are processed in
834 strict order but fetched asynchronously from the cache. This is important so
835 that a file in an included .isolated file that is overridden by an embedding
836 .isolated file is not fetched neededlessly. The includes are fetched in one
837 pass and the files are fetched as soon as all the ones on the left-side
838 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000839
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000840 The prioritization is very important here for nested .isolated files.
841 'includes' have the highest priority and the algorithm is optimized for both
842 deep and wide trees. A deep one is a long link of .isolated files referenced
843 one at a time by one item in 'includes'. A wide one has a large number of
844 'includes' in a single .isolated file. 'left' is defined as an included
845 .isolated file earlier in the 'includes' list. So the order of the elements
846 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000847 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000848 self.root = IsolatedFile(root_isolated_hash)
849 cache.retrieve(Remote.HIGH, root_isolated_hash)
850 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000851 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000852 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000853
854 def update_self(node):
855 node.fetch_files(cache, self.files)
856 # Grabs properties.
857 if not self.command and node.data.get('command'):
858 self.command = node.data['command']
859 if self.read_only is None and node.data.get('read_only') is not None:
860 self.read_only = node.data['read_only']
861 if (self.relative_cwd is None and
862 node.data.get('relative_cwd') is not None):
863 self.relative_cwd = node.data['relative_cwd']
864
865 def traverse_tree(node):
866 if node.can_fetch:
867 if not node.files_fetched:
868 update_self(node)
869 will_break = False
870 for i in node.children:
871 if not i.can_fetch:
872 if will_break:
873 break
874 # Automatically mark the first one as fetcheable.
875 i.can_fetch = True
876 will_break = True
877 traverse_tree(i)
878
879 while pending:
880 item_hash = cache.wait_for(pending)
881 item = pending.pop(item_hash)
882 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000883 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000884 # It's the root item.
885 item.can_fetch = True
886
887 for new_child in item.children:
888 h = new_child.obj_hash
889 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000890 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000891 pending[h] = new_child
892 cache.retrieve(Remote.HIGH, h)
893
894 # Traverse the whole tree to see if files can now be fetched.
895 traverse_tree(self.root)
896 def check(n):
897 return all(check(x) for x in n.children) and n.files_fetched
898 assert check(self.root)
899 self.relative_cwd = self.relative_cwd or ''
900 self.read_only = self.read_only or False
901
902
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000903def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000904 """Downloads the dependencies in the cache, hardlinks them into a temporary
905 directory and runs the executable.
906 """
907 settings = Settings()
908 with Cache(cache_dir, Remote(remote), policies) as cache:
909 outdir = make_temp_dir('run_tha_test', cache_dir)
910 try:
911 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000912 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000913 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000914 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000915 # Adds it in the cache. While not strictly necessary, this simplifies
916 # the rest.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000917 h = hashlib.sha1(open(isolated_hash, 'r').read()).hexdigest()
918 cache.add(isolated_hash, h)
919 isolated_hash = h
920 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000921
922 if not settings.command:
923 print >> sys.stderr, 'No command to run'
924 return 1
925
926 with Profiler('GetRest') as _prof:
927 logging.debug('Creating directories')
928 # Creates the tree of directories to create.
929 directories = set(os.path.dirname(f) for f in settings.files)
930 for item in list(directories):
931 while item:
932 directories.add(item)
933 item = os.path.dirname(item)
934 for d in sorted(directories):
935 if d:
936 os.mkdir(os.path.join(outdir, d))
937
938 # Creates the links if necessary.
939 for filepath, properties in settings.files.iteritems():
940 if 'link' not in properties:
941 continue
942 outfile = os.path.join(outdir, filepath)
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000943 # symlink doesn't exist on Windows. So the 'link' property should
944 # never be specified for windows .isolated file.
945 os.symlink(properties['link'], outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000946 if 'mode' in properties:
947 # It's not set on Windows.
maruel@chromium.org96768a42012-10-31 18:49:18 +0000948 lchmod = getattr(os, 'lchmod', None)
949 if lchmod:
950 lchmod(outfile, properties['mode'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000951
952 # Remaining files to be processed.
953 # Note that files could still be not be downloaded yet here.
954 remaining = dict()
955 for filepath, props in settings.files.iteritems():
956 if 'sha-1' in props:
957 remaining.setdefault(props['sha-1'], []).append((filepath, props))
958
959 # Do bookkeeping while files are being downloaded in the background.
960 cwd = os.path.join(outdir, settings.relative_cwd)
961 if not os.path.isdir(cwd):
962 os.makedirs(cwd)
963 cmd = settings.command[:]
964 # Ensure paths are correctly separated on windows.
965 cmd[0] = cmd[0].replace('/', os.path.sep)
966 cmd = fix_python_path(cmd)
967
968 # Now block on the remaining files to be downloaded and mapped.
969 while remaining:
970 obj = cache.wait_for(remaining)
971 for filepath, properties in remaining.pop(obj):
972 outfile = os.path.join(outdir, filepath)
973 link_file(outfile, cache.path(obj), HARDLINK)
974 if 'mode' in properties:
975 # It's not set on Windows.
976 os.chmod(outfile, properties['mode'])
977
978 if settings.read_only:
979 make_writable(outdir, True)
980 logging.info('Running %s, cwd=%s' % (cmd, cwd))
981 try:
982 with Profiler('RunTest') as _prof:
983 return subprocess.call(cmd, cwd=cwd)
984 except OSError:
985 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
986 raise
987 finally:
988 rmtree(outdir)
989
990
991def main():
992 parser = optparse.OptionParser(
993 usage='%prog <options>', description=sys.modules[__name__].__doc__)
994 parser.add_option(
995 '-v', '--verbose', action='count', default=0, help='Use multiple times')
996 parser.add_option('--no-run', action='store_true', help='Skip the run part')
997
998 group = optparse.OptionGroup(parser, 'Data source')
999 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001000 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001001 metavar='FILE',
1002 help='File/url describing what to map or run')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001003 # TODO(maruel): Remove once not used anymore.
1004 group.add_option(
1005 '-m', '--manifest', dest='isolated', help=optparse.SUPPRESS_HELP)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001006 group.add_option(
1007 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001008 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001009 parser.add_option_group(group)
1010
1011 group.add_option(
1012 '-r', '--remote', metavar='URL', help='Remote where to get the items')
1013 group = optparse.OptionGroup(parser, 'Cache management')
1014 group.add_option(
1015 '--cache',
1016 default='cache',
1017 metavar='DIR',
1018 help='Cache directory, default=%default')
1019 group.add_option(
1020 '--max-cache-size',
1021 type='int',
1022 metavar='NNN',
1023 default=20*1024*1024*1024,
1024 help='Trim if the cache gets larger than this value, default=%default')
1025 group.add_option(
1026 '--min-free-space',
1027 type='int',
1028 metavar='NNN',
1029 default=1*1024*1024*1024,
1030 help='Trim if disk free space becomes lower than this value, '
1031 'default=%default')
1032 group.add_option(
1033 '--max-items',
1034 type='int',
1035 metavar='NNN',
1036 default=100000,
1037 help='Trim if more than this number of items are in the cache '
1038 'default=%default')
1039 parser.add_option_group(group)
1040
1041 options, args = parser.parse_args()
1042 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
1043 logging.basicConfig(
1044 level=level,
1045 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
1046
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001047 if bool(options.isolated) == bool(options.hash):
1048 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001049 if not options.remote:
1050 parser.error('--remote is required.')
1051 if args:
1052 parser.error('Unsupported args %s' % ' '.join(args))
1053
1054 policies = CachePolicies(
1055 options.max_cache_size, options.min_free_space, options.max_items)
1056 try:
1057 return run_tha_test(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001058 options.isolated or options.hash,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001059 os.path.abspath(options.cache),
1060 options.remote,
1061 policies)
1062 except (ConfigError, MappingError), e:
1063 print >> sys.stderr, str(e)
1064 return 1
1065
1066
1067if __name__ == '__main__':
1068 sys.exit(main())