blob: 4944e522073e89a4a5bcf367168f395c413de622 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000011import ctypes
12import hashlib
13import json
14import logging
15import optparse
16import os
17import Queue
18import re
19import shutil
20import stat
21import subprocess
22import sys
23import tempfile
24import threading
25import time
26import urllib
27
28
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000029# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000030HARDLINK, SYMLINK, COPY = range(1, 4)
31
32RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
33
csharp@chromium.org8dc52542012-11-08 20:29:55 +000034# The file size to be used when we don't know the correct file size,
35# generally used for .isolated files.
36UNKNOWN_FILE_SIZE = None
37
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000038
39class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +000040 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000041 pass
42
43
44class MappingError(OSError):
45 """Failed to recreate the tree."""
46 pass
47
48
49def get_flavor():
50 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
51 flavors = {
52 'cygwin': 'win',
53 'win32': 'win',
54 'darwin': 'mac',
55 'sunos5': 'solaris',
56 'freebsd7': 'freebsd',
57 'freebsd8': 'freebsd',
58 }
59 return flavors.get(sys.platform, 'linux')
60
61
62def os_link(source, link_name):
63 """Add support for os.link() on Windows."""
64 if sys.platform == 'win32':
65 if not ctypes.windll.kernel32.CreateHardLinkW(
66 unicode(link_name), unicode(source), 0):
67 raise OSError()
68 else:
69 os.link(source, link_name)
70
71
72def readable_copy(outfile, infile):
73 """Makes a copy of the file that is readable by everyone."""
74 shutil.copy(infile, outfile)
75 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
76 stat.S_IRGRP | stat.S_IROTH)
77 os.chmod(outfile, read_enabled_mode)
78
79
80def link_file(outfile, infile, action):
81 """Links a file. The type of link depends on |action|."""
82 logging.debug('Mapping %s to %s' % (infile, outfile))
83 if action not in (HARDLINK, SYMLINK, COPY):
84 raise ValueError('Unknown mapping action %s' % action)
85 if not os.path.isfile(infile):
86 raise MappingError('%s is missing' % infile)
87 if os.path.isfile(outfile):
88 raise MappingError(
89 '%s already exist; insize:%d; outsize:%d' %
90 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
91
92 if action == COPY:
93 readable_copy(outfile, infile)
94 elif action == SYMLINK and sys.platform != 'win32':
95 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +000096 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000097 else:
98 try:
99 os_link(infile, outfile)
100 except OSError:
101 # Probably a different file system.
102 logging.warn(
103 'Failed to hardlink, failing back to copy %s to %s' % (
104 infile, outfile))
105 readable_copy(outfile, infile)
106
107
108def _set_write_bit(path, read_only):
109 """Sets or resets the executable bit on a file or directory."""
110 mode = os.lstat(path).st_mode
111 if read_only:
112 mode = mode & 0500
113 else:
114 mode = mode | 0200
115 if hasattr(os, 'lchmod'):
116 os.lchmod(path, mode) # pylint: disable=E1101
117 else:
118 if stat.S_ISLNK(mode):
119 # Skip symlink without lchmod() support.
120 logging.debug('Can\'t change +w bit on symlink %s' % path)
121 return
122
123 # TODO(maruel): Implement proper DACL modification on Windows.
124 os.chmod(path, mode)
125
126
127def make_writable(root, read_only):
128 """Toggle the writable bit on a directory tree."""
129 root = os.path.abspath(root)
130 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
131 for filename in filenames:
132 _set_write_bit(os.path.join(dirpath, filename), read_only)
133
134 for dirname in dirnames:
135 _set_write_bit(os.path.join(dirpath, dirname), read_only)
136
137
138def rmtree(root):
139 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
140 make_writable(root, False)
141 if sys.platform == 'win32':
142 for i in range(3):
143 try:
144 shutil.rmtree(root)
145 break
146 except WindowsError: # pylint: disable=E0602
147 delay = (i+1)*2
148 print >> sys.stderr, (
149 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
150 time.sleep(delay)
151 else:
152 shutil.rmtree(root)
153
154
155def is_same_filesystem(path1, path2):
156 """Returns True if both paths are on the same filesystem.
157
158 This is required to enable the use of hardlinks.
159 """
160 assert os.path.isabs(path1), path1
161 assert os.path.isabs(path2), path2
162 if sys.platform == 'win32':
163 # If the drive letter mismatches, assume it's a separate partition.
164 # TODO(maruel): It should look at the underlying drive, a drive letter could
165 # be a mount point to a directory on another drive.
166 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
167 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
168 if path1[0].lower() != path2[0].lower():
169 return False
170 return os.stat(path1).st_dev == os.stat(path2).st_dev
171
172
173def get_free_space(path):
174 """Returns the number of free bytes."""
175 if sys.platform == 'win32':
176 free_bytes = ctypes.c_ulonglong(0)
177 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
178 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
179 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000180 # For OSes other than Windows.
181 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000182 return f.f_bfree * f.f_frsize
183
184
185def make_temp_dir(prefix, root_dir):
186 """Returns a temporary directory on the same file system as root_dir."""
187 base_temp_dir = None
188 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
189 base_temp_dir = os.path.dirname(root_dir)
190 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
191
192
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000193def load_isolated(content):
194 """Verifies the .isolated file is valid and loads this object with the json
195 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000196 """
197 try:
198 data = json.loads(content)
199 except ValueError:
200 raise ConfigError('Failed to parse: %s...' % content[:100])
201
202 if not isinstance(data, dict):
203 raise ConfigError('Expected dict, got %r' % data)
204
205 for key, value in data.iteritems():
206 if key == 'command':
207 if not isinstance(value, list):
208 raise ConfigError('Expected list, got %r' % value)
209 for subvalue in value:
210 if not isinstance(subvalue, basestring):
211 raise ConfigError('Expected string, got %r' % subvalue)
212
213 elif key == 'files':
214 if not isinstance(value, dict):
215 raise ConfigError('Expected dict, got %r' % value)
216 for subkey, subvalue in value.iteritems():
217 if not isinstance(subkey, basestring):
218 raise ConfigError('Expected string, got %r' % subkey)
219 if not isinstance(subvalue, dict):
220 raise ConfigError('Expected dict, got %r' % subvalue)
221 for subsubkey, subsubvalue in subvalue.iteritems():
222 if subsubkey == 'link':
223 if not isinstance(subsubvalue, basestring):
224 raise ConfigError('Expected string, got %r' % subsubvalue)
225 elif subsubkey == 'mode':
226 if not isinstance(subsubvalue, int):
227 raise ConfigError('Expected int, got %r' % subsubvalue)
228 elif subsubkey == 'sha-1':
229 if not RE_IS_SHA1.match(subsubvalue):
230 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
231 elif subsubkey == 'size':
232 if not isinstance(subsubvalue, int):
233 raise ConfigError('Expected int, got %r' % subsubvalue)
234 elif subsubkey == 'timestamp':
235 if not isinstance(subsubvalue, int):
236 raise ConfigError('Expected int, got %r' % subsubvalue)
237 elif subsubkey == 'touched_only':
238 if not isinstance(subsubvalue, bool):
239 raise ConfigError('Expected bool, got %r' % subsubvalue)
240 else:
241 raise ConfigError('Unknown subsubkey %s' % subsubkey)
242 if bool('sha-1' in subvalue) and bool('link' in subvalue):
243 raise ConfigError(
244 'Did not expect both \'sha-1\' and \'link\', got: %r' % subvalue)
245
246 elif key == 'includes':
247 if not isinstance(value, list):
248 raise ConfigError('Expected list, got %r' % value)
249 for subvalue in value:
250 if not RE_IS_SHA1.match(subvalue):
251 raise ConfigError('Expected sha-1, got %r' % subvalue)
252
253 elif key == 'read_only':
254 if not isinstance(value, bool):
255 raise ConfigError('Expected bool, got %r' % value)
256
257 elif key == 'relative_cwd':
258 if not isinstance(value, basestring):
259 raise ConfigError('Expected string, got %r' % value)
260
261 elif key == 'os':
262 if value != get_flavor():
263 raise ConfigError(
264 'Expected \'os\' to be \'%s\' but got \'%s\'' %
265 (get_flavor(), value))
266
267 else:
268 raise ConfigError('Unknown key %s' % key)
269
270 return data
271
272
273def fix_python_path(cmd):
274 """Returns the fixed command line to call the right python executable."""
275 out = cmd[:]
276 if out[0] == 'python':
277 out[0] = sys.executable
278 elif out[0].endswith('.py'):
279 out.insert(0, sys.executable)
280 return out
281
282
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000283class WorkerThread(threading.Thread):
284 """Keeps the results of each task in a thread-local outputs variable."""
285 def __init__(self, tasks, *args, **kwargs):
286 super(WorkerThread, self).__init__(*args, **kwargs)
287 self._tasks = tasks
288 self.outputs = []
289 self.exceptions = []
290
291 self.daemon = True
292 self.start()
293
294 def run(self):
295 """Runs until a None task is queued."""
296 while True:
297 task = self._tasks.get()
298 if task is None:
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000299 # We're done.
300 return
301 try:
302 func, args, kwargs = task
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000303 self.outputs.append(func(*args, **kwargs))
304 except Exception, e:
305 logging.error('Caught exception! %s' % e)
306 self.exceptions.append(sys.exc_info())
307 finally:
308 self._tasks.task_done()
309
310
311class ThreadPool(object):
312 """Implements a multithreaded worker pool oriented for mapping jobs with
313 thread-local result storage.
314 """
315 QUEUE_CLASS = Queue.Queue
316
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000317 def __init__(self, num_threads, queue_size=0):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000318 logging.debug('Creating ThreadPool')
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000319 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000320 self._workers = [
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000321 WorkerThread(self.tasks, name='worker-%d' % i)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000322 for i in range(num_threads)
323 ]
324
325 def add_task(self, func, *args, **kwargs):
326 """Adds a task, a function to be executed by a worker.
327
328 The function's return value will be stored in the the worker's thread local
329 outputs list.
330 """
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000331 self.tasks.put((func, args, kwargs))
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000332
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000333 def join(self):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000334 """Extracts all the results from each threads unordered."""
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000335 self.tasks.join()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000336 out = []
337 # Look for exceptions.
338 for w in self._workers:
339 if w.exceptions:
340 raise w.exceptions[0][0], w.exceptions[0][1], w.exceptions[0][2]
341 out.extend(w.outputs)
342 w.outputs = []
343 return out
344
345 def close(self):
346 """Closes all the threads."""
347 for _ in range(len(self._workers)):
348 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000349 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000350 for t in self._workers:
351 t.join()
352
353 def __enter__(self):
354 """Enables 'with' statement."""
355 return self
356
357 def __exit__(self, exc_type, exc_value, traceback):
358 """Enables 'with' statement."""
359 self.close()
360
361
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000362def valid_file(filepath, size):
363 """Determines if the given files appears valid (currently it just checks
364 the file's size)."""
365 return (size == UNKNOWN_FILE_SIZE or size == os.stat(filepath).st_size)
366
367
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000368class Profiler(object):
369 def __init__(self, name):
370 self.name = name
371 self.start_time = None
372
373 def __enter__(self):
374 self.start_time = time.time()
375 return self
376
377 def __exit__(self, _exc_type, _exec_value, _traceback):
378 time_taken = time.time() - self.start_time
379 logging.info('Profiling: Section %s took %3.3f seconds',
380 self.name, time_taken)
381
382
383class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000384 """Priority based worker queue to fetch or upload files from a
385 content-address server. Any function may be given as the fetcher/upload,
386 as long as it takes two inputs (the item contents, and their relative
387 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000388
389 Supports local file system, CIFS or http remotes.
390
391 When the priority of items is equals, works in strict FIFO mode.
392 """
393 # Initial and maximum number of worker threads.
394 INITIAL_WORKERS = 2
395 MAX_WORKERS = 16
396 # Priorities.
397 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
398 INTERNAL_PRIORITY_BITS = (1<<8) - 1
399 RETRIES = 5
400
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000401 def __init__(self, destination_root):
402 # Function to fetch a remote object or upload to a remote location..
403 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000404 # Contains tuple(priority, index, obj, destination).
405 self._queue = Queue.PriorityQueue()
406 # Contains tuple(priority, index, obj).
407 self._done = Queue.PriorityQueue()
408
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000409 # Contains generated exceptions that haven't been handled yet.
410 self._exceptions = Queue.Queue()
411
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000412 # To keep FIFO ordering in self._queue. It is assumed xrange's iterator is
413 # thread-safe.
414 self._next_index = xrange(0, 1<<30).__iter__().next
415
416 # Control access to the following member.
417 self._ready_lock = threading.Lock()
418 # Number of threads in wait state.
419 self._ready = 0
420
421 # Control access to the following member.
422 self._workers_lock = threading.Lock()
423 self._workers = []
424 for _ in range(self.INITIAL_WORKERS):
425 self._add_worker()
426
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000427 def join(self):
428 """Blocks until the queue is empty."""
429 self._queue.join()
430
431 def next_exception(self):
432 """Returns the next unhandled exception, or None if there is
433 no exception."""
434 try:
435 return self._exceptions.get_nowait()
436 except Queue.Empty:
437 return None
438
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000439 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000440 """Retrieves an object from the remote data store.
441
442 The smaller |priority| gets fetched first.
443
444 Thread-safe.
445 """
446 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000447 self._add_to_queue(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000448
449 def get_result(self):
450 """Returns the next file that was successfully fetched."""
451 r = self._done.get()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000452 if r[0] == -1:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000453 # It's an exception.
454 raise r[2][0], r[2][1], r[2][2]
455 return r[2]
456
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000457 def _add_to_queue(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000458 with self._ready_lock:
459 start_new_worker = not self._ready
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000460 self._queue.put((priority, self._next_index(), obj, dest, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000461 if start_new_worker:
462 self._add_worker()
463
464 def _add_worker(self):
465 """Add one worker thread if there isn't too many. Thread-safe."""
466 with self._workers_lock:
467 if len(self._workers) >= self.MAX_WORKERS:
468 return False
469 worker = threading.Thread(target=self._run)
470 self._workers.append(worker)
471 worker.daemon = True
472 worker.start()
473
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000474 def _step_done(self, result):
475 """Worker helper function"""
476 self._done.put(result)
477 self._queue.task_done()
478 if result[0] == -1:
479 self._exceptions.put(sys.exc_info())
480
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000481 def _run(self):
482 """Worker thread loop."""
483 while True:
484 try:
485 with self._ready_lock:
486 self._ready += 1
487 item = self._queue.get()
488 finally:
489 with self._ready_lock:
490 self._ready -= 1
491 if not item:
492 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000493 priority, index, obj, dest, size = item
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000494 try:
495 self._do_item(obj, dest)
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000496 if size and not valid_file(dest, size):
497 download_size = os.stat(dest).st_size
498 os.remove(dest)
499 raise IOError('File incorrect size after download of %s. Got %s and '
500 'expected %s' % (dest, download_size, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000501 except IOError:
502 # Retry a few times, lowering the priority.
503 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000504 self._add_to_queue(priority + 1, obj, dest, size)
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000505 self._queue.task_done()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000506 continue
507 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000508 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000509 except:
510 # Transfers the exception back. It has maximum priority.
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000511 self._step_done((-1, 0, sys.exc_info()))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000512 else:
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000513 self._step_done((priority, index, obj))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000514
515 @staticmethod
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000516 def get_file_handler(file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000517 """Returns a object to retrieve objects from a remote."""
518 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org986c2c42012-10-04 14:39:33 +0000519 # TODO(maruel): This is particularly hackish. It shouldn't rstrip('/') in
520 # the first place or try to append '/'.
521 if not file_or_url.endswith('='):
522 file_or_url = file_or_url.rstrip('/') + '/'
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000523 def download_file(item, dest):
524 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
525 # easy.
526 source = file_or_url + item
527 logging.debug('download_file(%s, %s)', source, dest)
528 urllib.urlretrieve(source, dest)
529 return download_file
530
531 def copy_file(item, dest):
532 source = os.path.join(file_or_url, item)
533 logging.debug('copy_file(%s, %s)', source, dest)
534 shutil.copy(source, dest)
535 return copy_file
536
537
538class CachePolicies(object):
539 def __init__(self, max_cache_size, min_free_space, max_items):
540 """
541 Arguments:
542 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
543 cache is effectively a leak.
544 - min_free_space: Trim if disk free space becomes lower than this value. If
545 0, it unconditionally fill the disk.
546 - max_items: Maximum number of items to keep in the cache. If 0, do not
547 enforce a limit.
548 """
549 self.max_cache_size = max_cache_size
550 self.min_free_space = min_free_space
551 self.max_items = max_items
552
553
554class Cache(object):
555 """Stateful LRU cache.
556
557 Saves its state as json file.
558 """
559 STATE_FILE = 'state.json'
560
561 def __init__(self, cache_dir, remote, policies):
562 """
563 Arguments:
564 - cache_dir: Directory where to place the cache.
565 - remote: Remote where to fetch items from.
566 - policies: cache retention policies.
567 """
568 self.cache_dir = cache_dir
569 self.remote = remote
570 self.policies = policies
571 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
572 # The tuple(file, size) are kept as an array in a LRU style. E.g.
573 # self.state[0] is the oldest item.
574 self.state = []
575 # A lookup map to speed up searching.
576 self._lookup = {}
577 self._dirty = False
578
579 # Items currently being fetched. Keep it local to reduce lock contention.
580 self._pending_queue = set()
581
582 # Profiling values.
583 self._added = []
584 self._removed = []
585 self._free_disk = 0
586
587 if not os.path.isdir(self.cache_dir):
588 os.makedirs(self.cache_dir)
589 if os.path.isfile(self.state_file):
590 try:
591 self.state = json.load(open(self.state_file, 'r'))
592 except (IOError, ValueError), e:
593 # Too bad. The file will be overwritten and the cache cleared.
594 logging.error(
595 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
596 if (not isinstance(self.state, list) or
597 not all(
598 isinstance(i, (list, tuple)) and len(i) == 2 for i in self.state)):
599 # Discard.
600 self.state = []
601 self._dirty = True
602
603 # Ensure that all files listed in the state still exist and add new ones.
604 previous = set(filename for filename, _ in self.state)
605 if len(previous) != len(self.state):
606 logging.warn('Cache state is corrupted')
607 self._dirty = True
608 self.state = []
609 else:
610 added = 0
611 for filename in os.listdir(self.cache_dir):
612 if filename == self.STATE_FILE:
613 continue
614 if filename in previous:
615 previous.remove(filename)
616 continue
617 # An untracked file.
618 self._dirty = True
619 if not RE_IS_SHA1.match(filename):
620 logging.warn('Removing unknown file %s from cache', filename)
621 os.remove(self.path(filename))
622 else:
623 # Insert as the oldest file. It will be deleted eventually if not
624 # accessed.
625 self._add(filename, False)
626 added += 1
627 if added:
628 logging.warn('Added back %d unknown files', added)
629 self.state = [
630 (filename, size) for filename, size in self.state
631 if filename not in previous
632 ]
633 self._update_lookup()
634
635 with Profiler('SetupTrimming'):
636 self.trim()
637
638 def __enter__(self):
639 return self
640
641 def __exit__(self, _exc_type, _exec_value, _traceback):
642 with Profiler('CleanupTrimming'):
643 self.trim()
644
645 logging.info(
646 '%4d (%7dkb) added', len(self._added), sum(self._added) / 1024)
647 logging.info(
648 '%4d (%7dkb) current',
649 len(self.state),
650 sum(i[1] for i in self.state) / 1024)
651 logging.info(
652 '%4d (%7dkb) removed', len(self._removed), sum(self._removed) / 1024)
653 logging.info('%7dkb free', self._free_disk / 1024)
654
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000655 def remove_file_at_index(self, index):
656 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000657 try:
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000658 filename, size = self.state.pop(index)
659 # TODO(csharp): _lookup should self-update.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000660 del self._lookup[filename]
661 self._removed.append(size)
662 os.remove(self.path(filename))
663 self._dirty = True
664 except OSError as e:
665 logging.error('Error attempting to delete a file\n%s' % e)
666
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000667 def remove_lru_file(self):
668 """Removes the last recently used file."""
669 self.remove_file_at_index(0)
670
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000671 def trim(self):
672 """Trims anything we don't know, make sure enough free space exists."""
673 # Ensure maximum cache size.
674 if self.policies.max_cache_size and self.state:
675 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
676 self.remove_lru_file()
677
678 # Ensure maximum number of items in the cache.
679 if self.policies.max_items and self.state:
680 while len(self.state) > self.policies.max_items:
681 self.remove_lru_file()
682
683 # Ensure enough free space.
684 self._free_disk = get_free_space(self.cache_dir)
685 while (
686 self.policies.min_free_space and
687 self.state and
688 self._free_disk < self.policies.min_free_space):
689 self.remove_lru_file()
690 self._free_disk = get_free_space(self.cache_dir)
691
692 self.save()
693
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000694 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000695 """Retrieves a file from the remote, if not already cached, and adds it to
696 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000697
698 If the file is in the cache, verifiy that the file is valid (i.e. it is
699 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000700 """
701 assert not '/' in item
702 path = self.path(item)
703 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000704
705 if index is not None:
706 if not valid_file(self.path(item), size):
707 self.remove_file_at_index(index)
708 self._update_lookup()
709 index = None
710 else:
711 assert index < len(self.state)
712 # Was already in cache. Update it's LRU value by putting it at the end.
713 self.state.append(self.state.pop(index))
714 self._dirty = True
715 self._update_lookup()
716
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000717 if index is None:
718 if item in self._pending_queue:
719 # Already pending. The same object could be referenced multiple times.
720 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000721 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000722 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000723
724 def add(self, filepath, obj):
725 """Forcibly adds a file to the cache."""
726 if not obj in self._lookup:
727 link_file(self.path(obj), filepath, HARDLINK)
728 self._add(obj, True)
729
730 def path(self, item):
731 """Returns the path to one item."""
732 return os.path.join(self.cache_dir, item)
733
734 def save(self):
735 """Saves the LRU ordering."""
736 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
737
738 def wait_for(self, items):
739 """Starts a loop that waits for at least one of |items| to be retrieved.
740
741 Returns the first item retrieved.
742 """
743 # Flush items already present.
744 for item in items:
745 if item in self._lookup:
746 return item
747
748 assert all(i in self._pending_queue for i in items), (
749 items, self._pending_queue)
750 # Note that:
751 # len(self._pending_queue) ==
752 # ( len(self.remote._workers) - self.remote._ready +
753 # len(self._remote._queue) + len(self._remote.done))
754 # There is no lock-free way to verify that.
755 while self._pending_queue:
756 item = self.remote.get_result()
757 self._pending_queue.remove(item)
758 self._add(item, True)
759 if item in items:
760 return item
761
762 def _add(self, item, at_end):
763 """Adds an item in the internal state.
764
765 If |at_end| is False, self._lookup becomes inconsistent and
766 self._update_lookup() must be called.
767 """
768 size = os.stat(self.path(item)).st_size
769 self._added.append(size)
770 if at_end:
771 self.state.append((item, size))
772 self._lookup[item] = len(self.state) - 1
773 else:
774 self.state.insert(0, (item, size))
775 self._dirty = True
776
777 def _update_lookup(self):
778 self._lookup = dict(
779 (filename, index) for index, (filename, _) in enumerate(self.state))
780
781
782
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000783class IsolatedFile(object):
784 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000785 def __init__(self, obj_hash):
786 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000787 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000788 self.obj_hash = obj_hash
789 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000790 # .isolate and all the .isolated files recursively included by it with
791 # 'includes' key. The order of each sha-1 in 'includes', each representing a
792 # .isolated file in the hash table, is important, as the later ones are not
793 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000794 self.can_fetch = False
795
796 # Raw data.
797 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000798 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000799 self.children = []
800
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000801 # Set once the .isolated file is loaded.
802 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000803 # Set once the files are fetched.
804 self.files_fetched = False
805
806 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000807 """Verifies the .isolated file is valid and loads this object with the json
808 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000809 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000810 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
811 assert not self._is_parsed
812 self.data = load_isolated(content)
813 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
814 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000815
816 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000817 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000818
819 Preemptively request files.
820
821 Note that |files| is modified by this function.
822 """
823 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000824 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000825 return
826 logging.debug('fetch_files(%s)' % self.obj_hash)
827 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000828 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000829 # overriden files must not be fetched.
830 if filepath not in files:
831 files[filepath] = properties
832 if 'sha-1' in properties:
833 # Preemptively request files.
834 logging.debug('fetching %s' % filepath)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000835 cache.retrieve(Remote.MED, properties['sha-1'], properties['size'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000836 self.files_fetched = True
837
838
839class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000840 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000841 def __init__(self):
842 self.command = []
843 self.files = {}
844 self.read_only = None
845 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000846 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000847 self.root = None
848 logging.debug('Settings')
849
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000850 def load(self, cache, root_isolated_hash):
851 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000852
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000853 It enables support for "included" .isolated files. They are processed in
854 strict order but fetched asynchronously from the cache. This is important so
855 that a file in an included .isolated file that is overridden by an embedding
856 .isolated file is not fetched neededlessly. The includes are fetched in one
857 pass and the files are fetched as soon as all the ones on the left-side
858 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000859
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000860 The prioritization is very important here for nested .isolated files.
861 'includes' have the highest priority and the algorithm is optimized for both
862 deep and wide trees. A deep one is a long link of .isolated files referenced
863 one at a time by one item in 'includes'. A wide one has a large number of
864 'includes' in a single .isolated file. 'left' is defined as an included
865 .isolated file earlier in the 'includes' list. So the order of the elements
866 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000867 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000868 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000869 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000870 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000871 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000872 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000873
874 def update_self(node):
875 node.fetch_files(cache, self.files)
876 # Grabs properties.
877 if not self.command and node.data.get('command'):
878 self.command = node.data['command']
879 if self.read_only is None and node.data.get('read_only') is not None:
880 self.read_only = node.data['read_only']
881 if (self.relative_cwd is None and
882 node.data.get('relative_cwd') is not None):
883 self.relative_cwd = node.data['relative_cwd']
884
885 def traverse_tree(node):
886 if node.can_fetch:
887 if not node.files_fetched:
888 update_self(node)
889 will_break = False
890 for i in node.children:
891 if not i.can_fetch:
892 if will_break:
893 break
894 # Automatically mark the first one as fetcheable.
895 i.can_fetch = True
896 will_break = True
897 traverse_tree(i)
898
899 while pending:
900 item_hash = cache.wait_for(pending)
901 item = pending.pop(item_hash)
902 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000903 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000904 # It's the root item.
905 item.can_fetch = True
906
907 for new_child in item.children:
908 h = new_child.obj_hash
909 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000910 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000911 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000912 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000913
914 # Traverse the whole tree to see if files can now be fetched.
915 traverse_tree(self.root)
916 def check(n):
917 return all(check(x) for x in n.children) and n.files_fetched
918 assert check(self.root)
919 self.relative_cwd = self.relative_cwd or ''
920 self.read_only = self.read_only or False
921
922
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000923def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000924 """Downloads the dependencies in the cache, hardlinks them into a temporary
925 directory and runs the executable.
926 """
927 settings = Settings()
928 with Cache(cache_dir, Remote(remote), policies) as cache:
929 outdir = make_temp_dir('run_tha_test', cache_dir)
930 try:
931 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000932 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000933 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000934 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000935 # Adds it in the cache. While not strictly necessary, this simplifies
936 # the rest.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000937 h = hashlib.sha1(open(isolated_hash, 'r').read()).hexdigest()
938 cache.add(isolated_hash, h)
939 isolated_hash = h
940 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000941
942 if not settings.command:
943 print >> sys.stderr, 'No command to run'
944 return 1
945
946 with Profiler('GetRest') as _prof:
947 logging.debug('Creating directories')
948 # Creates the tree of directories to create.
949 directories = set(os.path.dirname(f) for f in settings.files)
950 for item in list(directories):
951 while item:
952 directories.add(item)
953 item = os.path.dirname(item)
954 for d in sorted(directories):
955 if d:
956 os.mkdir(os.path.join(outdir, d))
957
958 # Creates the links if necessary.
959 for filepath, properties in settings.files.iteritems():
960 if 'link' not in properties:
961 continue
962 outfile = os.path.join(outdir, filepath)
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000963 # symlink doesn't exist on Windows. So the 'link' property should
964 # never be specified for windows .isolated file.
965 os.symlink(properties['link'], outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000966 if 'mode' in properties:
967 # It's not set on Windows.
maruel@chromium.org96768a42012-10-31 18:49:18 +0000968 lchmod = getattr(os, 'lchmod', None)
969 if lchmod:
970 lchmod(outfile, properties['mode'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000971
972 # Remaining files to be processed.
973 # Note that files could still be not be downloaded yet here.
974 remaining = dict()
975 for filepath, props in settings.files.iteritems():
976 if 'sha-1' in props:
977 remaining.setdefault(props['sha-1'], []).append((filepath, props))
978
979 # Do bookkeeping while files are being downloaded in the background.
980 cwd = os.path.join(outdir, settings.relative_cwd)
981 if not os.path.isdir(cwd):
982 os.makedirs(cwd)
983 cmd = settings.command[:]
984 # Ensure paths are correctly separated on windows.
985 cmd[0] = cmd[0].replace('/', os.path.sep)
986 cmd = fix_python_path(cmd)
987
988 # Now block on the remaining files to be downloaded and mapped.
989 while remaining:
990 obj = cache.wait_for(remaining)
991 for filepath, properties in remaining.pop(obj):
992 outfile = os.path.join(outdir, filepath)
993 link_file(outfile, cache.path(obj), HARDLINK)
994 if 'mode' in properties:
995 # It's not set on Windows.
996 os.chmod(outfile, properties['mode'])
997
998 if settings.read_only:
999 make_writable(outdir, True)
1000 logging.info('Running %s, cwd=%s' % (cmd, cwd))
1001 try:
1002 with Profiler('RunTest') as _prof:
1003 return subprocess.call(cmd, cwd=cwd)
1004 except OSError:
1005 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1006 raise
1007 finally:
1008 rmtree(outdir)
1009
1010
1011def main():
1012 parser = optparse.OptionParser(
1013 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1014 parser.add_option(
1015 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1016 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1017
1018 group = optparse.OptionGroup(parser, 'Data source')
1019 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001020 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001021 metavar='FILE',
1022 help='File/url describing what to map or run')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001023 # TODO(maruel): Remove once not used anymore.
1024 group.add_option(
1025 '-m', '--manifest', dest='isolated', help=optparse.SUPPRESS_HELP)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001026 group.add_option(
1027 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001028 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001029 parser.add_option_group(group)
1030
1031 group.add_option(
1032 '-r', '--remote', metavar='URL', help='Remote where to get the items')
1033 group = optparse.OptionGroup(parser, 'Cache management')
1034 group.add_option(
1035 '--cache',
1036 default='cache',
1037 metavar='DIR',
1038 help='Cache directory, default=%default')
1039 group.add_option(
1040 '--max-cache-size',
1041 type='int',
1042 metavar='NNN',
1043 default=20*1024*1024*1024,
1044 help='Trim if the cache gets larger than this value, default=%default')
1045 group.add_option(
1046 '--min-free-space',
1047 type='int',
1048 metavar='NNN',
1049 default=1*1024*1024*1024,
1050 help='Trim if disk free space becomes lower than this value, '
1051 'default=%default')
1052 group.add_option(
1053 '--max-items',
1054 type='int',
1055 metavar='NNN',
1056 default=100000,
1057 help='Trim if more than this number of items are in the cache '
1058 'default=%default')
1059 parser.add_option_group(group)
1060
1061 options, args = parser.parse_args()
1062 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
1063 logging.basicConfig(
1064 level=level,
1065 format='%(levelname)5s %(module)15s(%(lineno)3d): %(message)s')
1066
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001067 if bool(options.isolated) == bool(options.hash):
1068 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001069 if not options.remote:
1070 parser.error('--remote is required.')
1071 if args:
1072 parser.error('Unsupported args %s' % ' '.join(args))
1073
1074 policies = CachePolicies(
1075 options.max_cache_size, options.min_free_space, options.max_items)
1076 try:
1077 return run_tha_test(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001078 options.isolated or options.hash,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001079 os.path.abspath(options.cache),
1080 options.remote,
1081 policies)
1082 except (ConfigError, MappingError), e:
1083 print >> sys.stderr, str(e)
1084 return 1
1085
1086
1087if __name__ == '__main__':
1088 sys.exit(main())