blob: bb0e66902828a900fbd0a64e1acd878b5b0ab687 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00006"""Reads a .isolated, creates a tree of hardlinks and runs the test.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
8Keeps a local cache.
9"""
10
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000011import ctypes
12import hashlib
csharp@chromium.orga110d792013-01-07 16:16:16 +000013import httplib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000014import json
15import logging
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000016import logging.handlers
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000017import optparse
18import os
19import Queue
20import re
21import shutil
22import stat
23import subprocess
24import sys
25import tempfile
26import threading
27import time
28import urllib
csharp@chromium.orga92403f2012-11-20 15:13:59 +000029import urllib2
30import zlib
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000031
32
maruel@chromium.org6b365dc2012-10-18 19:17:56 +000033# Types of action accepted by link_file().
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000034HARDLINK, SYMLINK, COPY = range(1, 4)
35
36RE_IS_SHA1 = re.compile(r'^[a-fA-F0-9]{40}$')
37
csharp@chromium.org8dc52542012-11-08 20:29:55 +000038# The file size to be used when we don't know the correct file size,
39# generally used for .isolated files.
40UNKNOWN_FILE_SIZE = None
41
csharp@chromium.orga92403f2012-11-20 15:13:59 +000042# The size of each chunk to read when downloading and unzipping files.
43ZIPPED_FILE_CHUNK = 16 * 1024
44
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000045# The name of the log file to use.
46RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
47
csharp@chromium.orge217f302012-11-22 16:51:53 +000048# The base directory containing this file.
49BASE_DIR = os.path.dirname(os.path.abspath(__file__))
50
51# The name of the log to use for the run_test_cases.py command
52RUN_TEST_CASES_LOG = os.path.join(BASE_DIR, 'run_test_cases.log')
53
csharp@chromium.org9c59ff12012-12-12 02:32:29 +000054# The delay (in seconds) to wait between logging statements when retrieving
55# the required files. This is intended to let the user (or buildbot) know that
56# the program is still running.
57DELAY_BETWEEN_UPDATES_IN_SECS = 30
58
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000059
60class ConfigError(ValueError):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +000061 """Generic failure to load a .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000062 pass
63
64
65class MappingError(OSError):
66 """Failed to recreate the tree."""
67 pass
68
69
csharp@chromium.orga92403f2012-11-20 15:13:59 +000070class DownloadFileOpener(urllib.FancyURLopener):
71 """This class is needed to get urlretrive to raise an exception on
72 404 errors, instead of still writing to the file with the error code.
73 """
74 def http_error_default(self, url, fp, errcode, errmsg, headers):
75 raise urllib2.HTTPError(url, errcode, errmsg, headers, fp)
76
77
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000078def get_flavor():
79 """Returns the system default flavor. Copied from gyp/pylib/gyp/common.py."""
80 flavors = {
81 'cygwin': 'win',
82 'win32': 'win',
83 'darwin': 'mac',
84 'sunos5': 'solaris',
85 'freebsd7': 'freebsd',
86 'freebsd8': 'freebsd',
87 }
88 return flavors.get(sys.platform, 'linux')
89
90
91def os_link(source, link_name):
92 """Add support for os.link() on Windows."""
93 if sys.platform == 'win32':
94 if not ctypes.windll.kernel32.CreateHardLinkW(
95 unicode(link_name), unicode(source), 0):
96 raise OSError()
97 else:
98 os.link(source, link_name)
99
100
101def readable_copy(outfile, infile):
102 """Makes a copy of the file that is readable by everyone."""
103 shutil.copy(infile, outfile)
104 read_enabled_mode = (os.stat(outfile).st_mode | stat.S_IRUSR |
105 stat.S_IRGRP | stat.S_IROTH)
106 os.chmod(outfile, read_enabled_mode)
107
108
109def link_file(outfile, infile, action):
110 """Links a file. The type of link depends on |action|."""
111 logging.debug('Mapping %s to %s' % (infile, outfile))
112 if action not in (HARDLINK, SYMLINK, COPY):
113 raise ValueError('Unknown mapping action %s' % action)
114 if not os.path.isfile(infile):
115 raise MappingError('%s is missing' % infile)
116 if os.path.isfile(outfile):
117 raise MappingError(
118 '%s already exist; insize:%d; outsize:%d' %
119 (outfile, os.stat(infile).st_size, os.stat(outfile).st_size))
120
121 if action == COPY:
122 readable_copy(outfile, infile)
123 elif action == SYMLINK and sys.platform != 'win32':
124 # On windows, symlink are converted to hardlink and fails over to copy.
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000125 os.symlink(infile, outfile) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000126 else:
127 try:
128 os_link(infile, outfile)
129 except OSError:
130 # Probably a different file system.
131 logging.warn(
132 'Failed to hardlink, failing back to copy %s to %s' % (
133 infile, outfile))
134 readable_copy(outfile, infile)
135
136
137def _set_write_bit(path, read_only):
138 """Sets or resets the executable bit on a file or directory."""
139 mode = os.lstat(path).st_mode
140 if read_only:
141 mode = mode & 0500
142 else:
143 mode = mode | 0200
144 if hasattr(os, 'lchmod'):
145 os.lchmod(path, mode) # pylint: disable=E1101
146 else:
147 if stat.S_ISLNK(mode):
148 # Skip symlink without lchmod() support.
149 logging.debug('Can\'t change +w bit on symlink %s' % path)
150 return
151
152 # TODO(maruel): Implement proper DACL modification on Windows.
153 os.chmod(path, mode)
154
155
156def make_writable(root, read_only):
157 """Toggle the writable bit on a directory tree."""
csharp@chromium.org837352f2013-01-17 21:17:03 +0000158 assert os.path.isabs(root), root
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000159 for dirpath, dirnames, filenames in os.walk(root, topdown=True):
160 for filename in filenames:
161 _set_write_bit(os.path.join(dirpath, filename), read_only)
162
163 for dirname in dirnames:
164 _set_write_bit(os.path.join(dirpath, dirname), read_only)
165
166
167def rmtree(root):
168 """Wrapper around shutil.rmtree() to retry automatically on Windows."""
169 make_writable(root, False)
170 if sys.platform == 'win32':
171 for i in range(3):
172 try:
173 shutil.rmtree(root)
174 break
175 except WindowsError: # pylint: disable=E0602
176 delay = (i+1)*2
177 print >> sys.stderr, (
178 'The test has subprocess outliving it. Sleep %d seconds.' % delay)
179 time.sleep(delay)
180 else:
181 shutil.rmtree(root)
182
183
184def is_same_filesystem(path1, path2):
185 """Returns True if both paths are on the same filesystem.
186
187 This is required to enable the use of hardlinks.
188 """
189 assert os.path.isabs(path1), path1
190 assert os.path.isabs(path2), path2
191 if sys.platform == 'win32':
192 # If the drive letter mismatches, assume it's a separate partition.
193 # TODO(maruel): It should look at the underlying drive, a drive letter could
194 # be a mount point to a directory on another drive.
195 assert re.match(r'^[a-zA-Z]\:\\.*', path1), path1
196 assert re.match(r'^[a-zA-Z]\:\\.*', path2), path2
197 if path1[0].lower() != path2[0].lower():
198 return False
199 return os.stat(path1).st_dev == os.stat(path2).st_dev
200
201
202def get_free_space(path):
203 """Returns the number of free bytes."""
204 if sys.platform == 'win32':
205 free_bytes = ctypes.c_ulonglong(0)
206 ctypes.windll.kernel32.GetDiskFreeSpaceExW(
207 ctypes.c_wchar_p(path), None, None, ctypes.pointer(free_bytes))
208 return free_bytes.value
maruel@chromium.orgf43e68b2012-10-15 20:23:10 +0000209 # For OSes other than Windows.
210 f = os.statvfs(path) # pylint: disable=E1101
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000211 return f.f_bfree * f.f_frsize
212
213
214def make_temp_dir(prefix, root_dir):
215 """Returns a temporary directory on the same file system as root_dir."""
216 base_temp_dir = None
217 if not is_same_filesystem(root_dir, tempfile.gettempdir()):
218 base_temp_dir = os.path.dirname(root_dir)
219 return tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir)
220
221
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000222def load_isolated(content):
223 """Verifies the .isolated file is valid and loads this object with the json
224 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000225 """
226 try:
227 data = json.loads(content)
228 except ValueError:
229 raise ConfigError('Failed to parse: %s...' % content[:100])
230
231 if not isinstance(data, dict):
232 raise ConfigError('Expected dict, got %r' % data)
233
234 for key, value in data.iteritems():
235 if key == 'command':
236 if not isinstance(value, list):
237 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000238 if not value:
239 raise ConfigError('Expected non-empty command')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000240 for subvalue in value:
241 if not isinstance(subvalue, basestring):
242 raise ConfigError('Expected string, got %r' % subvalue)
243
244 elif key == 'files':
245 if not isinstance(value, dict):
246 raise ConfigError('Expected dict, got %r' % value)
247 for subkey, subvalue in value.iteritems():
248 if not isinstance(subkey, basestring):
249 raise ConfigError('Expected string, got %r' % subkey)
250 if not isinstance(subvalue, dict):
251 raise ConfigError('Expected dict, got %r' % subvalue)
252 for subsubkey, subsubvalue in subvalue.iteritems():
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000253 if subsubkey == 'l':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000254 if not isinstance(subsubvalue, basestring):
255 raise ConfigError('Expected string, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000256 elif subsubkey == 'm':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000257 if not isinstance(subsubvalue, int):
258 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000259 elif subsubkey == 'h':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000260 if not RE_IS_SHA1.match(subsubvalue):
261 raise ConfigError('Expected sha-1, got %r' % subsubvalue)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000262 elif subsubkey == 's':
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000263 if not isinstance(subsubvalue, int):
264 raise ConfigError('Expected int, got %r' % subsubvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000265 else:
266 raise ConfigError('Unknown subsubkey %s' % subsubkey)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000267 if bool('h' in subvalue) and bool('l' in subvalue):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000268 raise ConfigError(
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000269 'Did not expect both \'h\' (sha-1) and \'l\' (link), got: %r' %
270 subvalue)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000271
272 elif key == 'includes':
273 if not isinstance(value, list):
274 raise ConfigError('Expected list, got %r' % value)
maruel@chromium.org89ad2db2012-12-12 14:29:22 +0000275 if not value:
276 raise ConfigError('Expected non-empty includes list')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000277 for subvalue in value:
278 if not RE_IS_SHA1.match(subvalue):
279 raise ConfigError('Expected sha-1, got %r' % subvalue)
280
281 elif key == 'read_only':
282 if not isinstance(value, bool):
283 raise ConfigError('Expected bool, got %r' % value)
284
285 elif key == 'relative_cwd':
286 if not isinstance(value, basestring):
287 raise ConfigError('Expected string, got %r' % value)
288
289 elif key == 'os':
290 if value != get_flavor():
291 raise ConfigError(
292 'Expected \'os\' to be \'%s\' but got \'%s\'' %
293 (get_flavor(), value))
294
295 else:
296 raise ConfigError('Unknown key %s' % key)
297
298 return data
299
300
301def fix_python_path(cmd):
302 """Returns the fixed command line to call the right python executable."""
303 out = cmd[:]
304 if out[0] == 'python':
305 out[0] = sys.executable
306 elif out[0].endswith('.py'):
307 out.insert(0, sys.executable)
308 return out
309
310
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000311class ThreadPool(object):
312 """Implements a multithreaded worker pool oriented for mapping jobs with
313 thread-local result storage.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000314
315 Arguments:
316 - initial_threads: Number of threads to start immediately. Can be 0 if it is
317 uncertain that threads will be needed.
318 - max_threads: Maximum number of threads that will be started when all the
319 threads are busy working. Often the number of CPU cores.
320 - queue_size: Maximum number of tasks to buffer in the queue. 0 for unlimited
321 queue. A non-zero value may make add_task() blocking.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000322 """
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000323 QUEUE_CLASS = Queue.PriorityQueue
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000324
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000325 def __init__(self, initial_threads, max_threads, queue_size):
326 logging.debug(
327 'ThreadPool(%d, %d, %d)', initial_threads, max_threads, queue_size)
328 assert initial_threads <= max_threads
329 # Update this check once 256 cores CPU are common.
330 assert max_threads <= 256
331
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000332 self.tasks = self.QUEUE_CLASS(queue_size)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000333 self._max_threads = max_threads
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000334
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000335 # Mutables.
336 self._num_of_added_tasks_lock = threading.Lock()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000337 self._num_of_added_tasks = 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000338 self._outputs_exceptions_cond = threading.Condition()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000339 self._outputs = []
340 self._exceptions = []
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000341 # Number of threads in wait state.
342 self._ready_lock = threading.Lock()
343 self._ready = 0
344 self._workers_lock = threading.Lock()
345 self._workers = []
346 for _ in range(initial_threads):
347 self._add_worker()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000348
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000349 def _add_worker(self):
350 """Adds one worker thread if there isn't too many. Thread-safe."""
351 # Better to take the lock two times than hold it for too long.
352 with self._workers_lock:
353 if len(self._workers) >= self._max_threads:
354 return False
355 worker = threading.Thread(target=self._run)
356 with self._workers_lock:
357 if len(self._workers) >= self._max_threads:
358 return False
359 self._workers.append(worker)
360 worker.daemon = True
361 worker.start()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000362
maruel@chromium.org831958f2013-01-22 15:01:46 +0000363 def add_task(self, priority, func, *args, **kwargs):
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000364 """Adds a task, a function to be executed by a worker.
365
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000366 |priority| can adjust the priority of the task versus others. Lower priority
maruel@chromium.org831958f2013-01-22 15:01:46 +0000367 takes precedence.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000368
369 Returns the index of the item added, e.g. the total number of enqueued items
370 up to now.
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000371 """
maruel@chromium.org831958f2013-01-22 15:01:46 +0000372 assert isinstance(priority, int)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000373 assert callable(func)
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000374 with self._ready_lock:
375 start_new_worker = not self._ready
376 with self._num_of_added_tasks_lock:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000377 self._num_of_added_tasks += 1
378 index = self._num_of_added_tasks
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000379 self.tasks.put((priority, index, func, args, kwargs))
380 if start_new_worker:
381 self._add_worker()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000382 return index
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000383
384 def _run(self):
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000385 """Worker thread loop. Runs until a None task is queued."""
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000386 while True:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000387 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000388 with self._ready_lock:
389 self._ready += 1
390 task = self.tasks.get()
391 finally:
392 with self._ready_lock:
393 self._ready -= 1
394 try:
395 if task is None:
396 # We're done.
397 return
398 _priority, _index, func, args, kwargs = task
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000399 out = func(*args, **kwargs)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000400 if out is not None:
401 self._outputs_exceptions_cond.acquire()
402 try:
403 self._outputs.append(out)
404 self._outputs_exceptions_cond.notifyAll()
405 finally:
406 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000407 except Exception as e:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000408 logging.warning('Caught exception: %s', e)
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000409 exc_info = sys.exc_info()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000410 self._outputs_exceptions_cond.acquire()
411 try:
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000412 self._exceptions.append(exc_info)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000413 self._outputs_exceptions_cond.notifyAll()
414 finally:
415 self._outputs_exceptions_cond.release()
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000416 finally:
417 self.tasks.task_done()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000418
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000419 def join(self):
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000420 """Extracts all the results from each threads unordered.
421
422 Call repeatedly to extract all the exceptions if desired.
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000423
424 Note: will wait for all work items to be done before returning an exception.
425 To get an exception early, use get_one_result().
maruel@chromium.org5a1446a2013-01-17 15:13:27 +0000426 """
427 # TODO(maruel): Stop waiting as soon as an exception is caught.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000428 self.tasks.join()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000429 self._outputs_exceptions_cond.acquire()
430 try:
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000431 if self._exceptions:
432 e = self._exceptions.pop(0)
433 raise e[0], e[1], e[2]
maruel@chromium.org6b0c9ec2013-01-18 00:34:31 +0000434 out = self._outputs
435 self._outputs = []
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000436 finally:
437 self._outputs_exceptions_cond.release()
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000438 return out
439
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000440 def get_one_result(self):
441 """Returns the next item that was generated or raises an exception if one
442 occured.
443
444 Warning: this function will hang if there is no work item left. Use join
445 instead.
446 """
447 self._outputs_exceptions_cond.acquire()
448 try:
449 while True:
450 if self._exceptions:
451 e = self._exceptions.pop(0)
452 raise e[0], e[1], e[2]
453 if self._outputs:
454 return self._outputs.pop(0)
455 self._outputs_exceptions_cond.wait()
456 finally:
457 self._outputs_exceptions_cond.release()
458
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000459 def close(self):
460 """Closes all the threads."""
461 for _ in range(len(self._workers)):
462 # Enqueueing None causes the worker to stop.
maruel@chromium.orgeb281652012-11-08 21:10:23 +0000463 self.tasks.put(None)
maruel@chromium.org8df128b2012-11-08 19:05:04 +0000464 for t in self._workers:
465 t.join()
466
467 def __enter__(self):
468 """Enables 'with' statement."""
469 return self
470
471 def __exit__(self, exc_type, exc_value, traceback):
472 """Enables 'with' statement."""
473 self.close()
474
475
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000476def valid_file(filepath, size):
477 """Determines if the given files appears valid (currently it just checks
478 the file's size)."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000479 if size == UNKNOWN_FILE_SIZE:
480 return True
481 actual_size = os.stat(filepath).st_size
482 if size != actual_size:
483 logging.warning(
484 'Found invalid item %s; %d != %d',
485 os.path.basename(filepath), actual_size, size)
486 return False
487 return True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000488
489
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000490class Profiler(object):
491 def __init__(self, name):
492 self.name = name
493 self.start_time = None
494
495 def __enter__(self):
496 self.start_time = time.time()
497 return self
498
499 def __exit__(self, _exc_type, _exec_value, _traceback):
500 time_taken = time.time() - self.start_time
501 logging.info('Profiling: Section %s took %3.3f seconds',
502 self.name, time_taken)
503
504
505class Remote(object):
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000506 """Priority based worker queue to fetch or upload files from a
507 content-address server. Any function may be given as the fetcher/upload,
508 as long as it takes two inputs (the item contents, and their relative
509 destination).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000510
511 Supports local file system, CIFS or http remotes.
512
513 When the priority of items is equals, works in strict FIFO mode.
514 """
515 # Initial and maximum number of worker threads.
516 INITIAL_WORKERS = 2
517 MAX_WORKERS = 16
518 # Priorities.
519 LOW, MED, HIGH = (1<<8, 2<<8, 3<<8)
520 INTERNAL_PRIORITY_BITS = (1<<8) - 1
521 RETRIES = 5
522
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000523 def __init__(self, destination_root):
524 # Function to fetch a remote object or upload to a remote location..
525 self._do_item = self.get_file_handler(destination_root)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000526 # Contains tuple(priority, obj).
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000527 self._done = Queue.PriorityQueue()
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000528 self._pool = ThreadPool(self.INITIAL_WORKERS, self.MAX_WORKERS, 0)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000529
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000530 def join(self):
531 """Blocks until the queue is empty."""
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000532 return self._pool.join()
maruel@chromium.orgfb155e92012-09-28 20:36:54 +0000533
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000534 def add_item(self, priority, obj, dest, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000535 """Retrieves an object from the remote data store.
536
537 The smaller |priority| gets fetched first.
538
539 Thread-safe.
540 """
541 assert (priority & self.INTERNAL_PRIORITY_BITS) == 0
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000542 return self._add_item(priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000543
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000544 def _add_item(self, priority, obj, dest, size):
545 assert isinstance(obj, basestring), obj
546 assert isinstance(dest, basestring), dest
547 assert size is None or isinstance(size, int), size
548 return self._pool.add_task(
549 priority, self._task_executer, priority, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000550
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000551 def get_one_result(self):
552 return self._pool.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000553
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000554 def _task_executer(self, priority, obj, dest, size):
555 """Wraps self._do_item to trap and retry on IOError exceptions."""
556 try:
557 self._do_item(obj, dest)
558 if size and not valid_file(dest, size):
559 download_size = os.stat(dest).st_size
560 os.remove(dest)
561 raise IOError('File incorrect size after download of %s. Got %s and '
562 'expected %s' % (obj, download_size, size))
563 # TODO(maruel): Technically, we'd want to have an output queue to be a
564 # PriorityQueue.
565 return obj
566 except IOError as e:
567 logging.debug('Caught IOError: %s', e)
568 # Retry a few times, lowering the priority.
569 if (priority & self.INTERNAL_PRIORITY_BITS) < self.RETRIES:
570 self._add_item(priority + 1, obj, dest, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000571 return
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000572 raise
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000573
csharp@chromium.org59c7bcf2012-11-21 21:13:18 +0000574 def get_file_handler(self, file_or_url): # pylint: disable=R0201
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000575 """Returns a object to retrieve objects from a remote."""
576 if re.match(r'^https?://.+$', file_or_url):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000577 def download_file(item, dest):
578 # TODO(maruel): Reuse HTTP connections. The stdlib doesn't make this
579 # easy.
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000580 try:
csharp@chromium.orgaa2d1512012-12-05 21:17:39 +0000581 zipped_source = file_or_url + item
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000582 logging.debug('download_file(%s)', zipped_source)
583 connection = urllib2.urlopen(zipped_source)
584 decompressor = zlib.decompressobj()
maruel@chromium.org3f039182012-11-27 21:32:41 +0000585 size = 0
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000586 with open(dest, 'wb') as f:
587 while True:
588 chunk = connection.read(ZIPPED_FILE_CHUNK)
589 if not chunk:
590 break
maruel@chromium.org3f039182012-11-27 21:32:41 +0000591 size += len(chunk)
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000592 f.write(decompressor.decompress(chunk))
593 # Ensure that all the data was properly decompressed.
594 uncompressed_data = decompressor.flush()
595 assert not uncompressed_data
csharp@chromium.org549669e2013-01-22 19:48:17 +0000596 except IOError:
597 logging.error('Encountered an exception with (%s, %s)' % (item, dest))
598 raise
csharp@chromium.orga110d792013-01-07 16:16:16 +0000599 except httplib.HTTPException as e:
600 raise IOError('Encountered an HTTPException.\n%s' % e)
csharp@chromium.org186d6232012-11-26 14:36:12 +0000601 except zlib.error as e:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +0000602 # Log the first bytes to see if it's uncompressed data.
603 logging.warning('%r', e[:512])
maruel@chromium.org3f039182012-11-27 21:32:41 +0000604 raise IOError(
605 'Problem unzipping data for item %s. Got %d bytes.\n%s' %
606 (item, size, e))
csharp@chromium.orga92403f2012-11-20 15:13:59 +0000607
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000608 return download_file
609
610 def copy_file(item, dest):
611 source = os.path.join(file_or_url, item)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000612 if source == dest:
613 logging.info('Source and destination are the same, no action required')
614 return
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000615 logging.debug('copy_file(%s, %s)', source, dest)
616 shutil.copy(source, dest)
617 return copy_file
618
619
620class CachePolicies(object):
621 def __init__(self, max_cache_size, min_free_space, max_items):
622 """
623 Arguments:
624 - max_cache_size: Trim if the cache gets larger than this value. If 0, the
625 cache is effectively a leak.
626 - min_free_space: Trim if disk free space becomes lower than this value. If
627 0, it unconditionally fill the disk.
628 - max_items: Maximum number of items to keep in the cache. If 0, do not
629 enforce a limit.
630 """
631 self.max_cache_size = max_cache_size
632 self.min_free_space = min_free_space
633 self.max_items = max_items
634
635
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000636class NoCache(object):
637 """This class is intended to be usable everywhere the Cache class is.
638 Instead of downloading to a cache, all files are downloaded to the target
639 directory and then moved to where they are needed.
640 """
641
642 def __init__(self, target_directory, remote):
643 self.target_directory = target_directory
644 self.remote = remote
645
646 def retrieve(self, priority, item, size):
647 """Get the request file."""
648 self.remote.add_item(priority, item, self.path(item), size)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000649 self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000650
651 def wait_for(self, items):
652 """Download the first item of the given list if it is missing."""
653 item = items.iterkeys().next()
654
655 if not os.path.exists(self.path(item)):
656 self.remote.add_item(Remote.MED, item, self.path(item), UNKNOWN_FILE_SIZE)
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000657 downloaded = self.remote.get_one_result()
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +0000658 assert downloaded == item
659
660 return item
661
662 def path(self, item):
663 return os.path.join(self.target_directory, item)
664
665
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000666class Cache(object):
667 """Stateful LRU cache.
668
669 Saves its state as json file.
670 """
671 STATE_FILE = 'state.json'
672
673 def __init__(self, cache_dir, remote, policies):
674 """
675 Arguments:
676 - cache_dir: Directory where to place the cache.
677 - remote: Remote where to fetch items from.
678 - policies: cache retention policies.
679 """
680 self.cache_dir = cache_dir
681 self.remote = remote
682 self.policies = policies
683 self.state_file = os.path.join(cache_dir, self.STATE_FILE)
684 # The tuple(file, size) are kept as an array in a LRU style. E.g.
685 # self.state[0] is the oldest item.
686 self.state = []
maruel@chromium.org770993b2012-12-11 17:16:48 +0000687 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000688 # A lookup map to speed up searching.
689 self._lookup = {}
maruel@chromium.org770993b2012-12-11 17:16:48 +0000690 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000691
692 # Items currently being fetched. Keep it local to reduce lock contention.
693 self._pending_queue = set()
694
695 # Profiling values.
696 self._added = []
697 self._removed = []
698 self._free_disk = 0
699
maruel@chromium.org770993b2012-12-11 17:16:48 +0000700 with Profiler('Setup'):
701 if not os.path.isdir(self.cache_dir):
702 os.makedirs(self.cache_dir)
703 if os.path.isfile(self.state_file):
704 try:
705 self.state = json.load(open(self.state_file, 'r'))
706 except (IOError, ValueError), e:
707 # Too bad. The file will be overwritten and the cache cleared.
708 logging.error(
709 'Broken state file %s, ignoring.\n%s' % (self.STATE_FILE, e))
710 self._state_need_to_be_saved = True
711 if (not isinstance(self.state, list) or
712 not all(
713 isinstance(i, (list, tuple)) and len(i) == 2
714 for i in self.state)):
715 # Discard.
716 self._state_need_to_be_saved = True
717 self.state = []
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000718
maruel@chromium.org770993b2012-12-11 17:16:48 +0000719 # Ensure that all files listed in the state still exist and add new ones.
720 previous = set(filename for filename, _ in self.state)
721 if len(previous) != len(self.state):
722 logging.warn('Cache state is corrupted, found duplicate files')
723 self._state_need_to_be_saved = True
724 self.state = []
725
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000726 added = 0
727 for filename in os.listdir(self.cache_dir):
728 if filename == self.STATE_FILE:
729 continue
730 if filename in previous:
731 previous.remove(filename)
732 continue
733 # An untracked file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000734 if not RE_IS_SHA1.match(filename):
735 logging.warn('Removing unknown file %s from cache', filename)
736 os.remove(self.path(filename))
maruel@chromium.org770993b2012-12-11 17:16:48 +0000737 continue
738 # Insert as the oldest file. It will be deleted eventually if not
739 # accessed.
740 self._add(filename, False)
741 logging.warn('Add unknown file %s to cache', filename)
742 added += 1
743
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000744 if added:
745 logging.warn('Added back %d unknown files', added)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000746 if previous:
747 logging.warn('Removed %d lost files', len(previous))
748 # Set explicitly in case self._add() wasn't called.
749 self._state_need_to_be_saved = True
750 # Filter out entries that were not found while keeping the previous
751 # order.
752 self.state = [
753 (filename, size) for filename, size in self.state
754 if filename not in previous
755 ]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000756 self.trim()
757
758 def __enter__(self):
759 return self
760
761 def __exit__(self, _exc_type, _exec_value, _traceback):
762 with Profiler('CleanupTrimming'):
763 self.trim()
764
765 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000766 '%5d (%8dkb) added', len(self._added), sum(self._added) / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000767 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000768 '%5d (%8dkb) current',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000769 len(self.state),
770 sum(i[1] for i in self.state) / 1024)
771 logging.info(
maruel@chromium.org5fd6f472012-12-11 00:26:08 +0000772 '%5d (%8dkb) removed', len(self._removed), sum(self._removed) / 1024)
773 logging.info(' %8dkb free', self._free_disk / 1024)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000774
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000775 def remove_file_at_index(self, index):
776 """Removes the file at the given index."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000777 try:
maruel@chromium.org770993b2012-12-11 17:16:48 +0000778 self._state_need_to_be_saved = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000779 filename, size = self.state.pop(index)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000780 # If the lookup was already stale, its possible the filename was not
781 # present yet.
782 self._lookup_is_stale = True
783 self._lookup.pop(filename, None)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000784 self._removed.append(size)
785 os.remove(self.path(filename))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000786 except OSError as e:
787 logging.error('Error attempting to delete a file\n%s' % e)
788
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000789 def remove_lru_file(self):
790 """Removes the last recently used file."""
791 self.remove_file_at_index(0)
792
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000793 def trim(self):
794 """Trims anything we don't know, make sure enough free space exists."""
795 # Ensure maximum cache size.
796 if self.policies.max_cache_size and self.state:
797 while sum(i[1] for i in self.state) > self.policies.max_cache_size:
798 self.remove_lru_file()
799
800 # Ensure maximum number of items in the cache.
801 if self.policies.max_items and self.state:
802 while len(self.state) > self.policies.max_items:
803 self.remove_lru_file()
804
805 # Ensure enough free space.
806 self._free_disk = get_free_space(self.cache_dir)
807 while (
808 self.policies.min_free_space and
809 self.state and
810 self._free_disk < self.policies.min_free_space):
811 self.remove_lru_file()
812 self._free_disk = get_free_space(self.cache_dir)
813
814 self.save()
815
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000816 def retrieve(self, priority, item, size):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000817 """Retrieves a file from the remote, if not already cached, and adds it to
818 the cache.
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000819
820 If the file is in the cache, verifiy that the file is valid (i.e. it is
821 the correct size), retrieving it again if it isn't.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000822 """
823 assert not '/' in item
824 path = self.path(item)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000825 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000826 index = self._lookup.get(item)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000827
828 if index is not None:
829 if not valid_file(self.path(item), size):
830 self.remove_file_at_index(index)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000831 index = None
832 else:
833 assert index < len(self.state)
834 # Was already in cache. Update it's LRU value by putting it at the end.
maruel@chromium.org770993b2012-12-11 17:16:48 +0000835 self._state_need_to_be_saved = True
836 self._lookup_is_stale = True
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000837 self.state.append(self.state.pop(index))
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000838
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000839 if index is None:
840 if item in self._pending_queue:
841 # Already pending. The same object could be referenced multiple times.
842 return
csharp@chromium.orgdf2968f2012-11-16 20:25:37 +0000843 self.remote.add_item(priority, item, path, size)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000844 self._pending_queue.add(item)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000845
846 def add(self, filepath, obj):
847 """Forcibly adds a file to the cache."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000848 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000849 if not obj in self._lookup:
850 link_file(self.path(obj), filepath, HARDLINK)
851 self._add(obj, True)
852
853 def path(self, item):
854 """Returns the path to one item."""
855 return os.path.join(self.cache_dir, item)
856
857 def save(self):
858 """Saves the LRU ordering."""
maruel@chromium.org770993b2012-12-11 17:16:48 +0000859 if self._state_need_to_be_saved:
860 json.dump(self.state, open(self.state_file, 'wb'), separators=(',',':'))
861 self._state_need_to_be_saved = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000862
863 def wait_for(self, items):
864 """Starts a loop that waits for at least one of |items| to be retrieved.
865
866 Returns the first item retrieved.
867 """
868 # Flush items already present.
maruel@chromium.org770993b2012-12-11 17:16:48 +0000869 self._update_lookup()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000870 for item in items:
871 if item in self._lookup:
872 return item
873
874 assert all(i in self._pending_queue for i in items), (
875 items, self._pending_queue)
876 # Note that:
877 # len(self._pending_queue) ==
878 # ( len(self.remote._workers) - self.remote._ready +
879 # len(self._remote._queue) + len(self._remote.done))
880 # There is no lock-free way to verify that.
881 while self._pending_queue:
maruel@chromium.org13eca0b2013-01-22 16:42:21 +0000882 item = self.remote.get_one_result()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000883 self._pending_queue.remove(item)
884 self._add(item, True)
885 if item in items:
886 return item
887
888 def _add(self, item, at_end):
889 """Adds an item in the internal state.
890
891 If |at_end| is False, self._lookup becomes inconsistent and
892 self._update_lookup() must be called.
893 """
894 size = os.stat(self.path(item)).st_size
895 self._added.append(size)
maruel@chromium.org770993b2012-12-11 17:16:48 +0000896 self._state_need_to_be_saved = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000897 if at_end:
898 self.state.append((item, size))
899 self._lookup[item] = len(self.state) - 1
900 else:
maruel@chromium.org770993b2012-12-11 17:16:48 +0000901 self._lookup_is_stale = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000902 self.state.insert(0, (item, size))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000903
904 def _update_lookup(self):
maruel@chromium.org770993b2012-12-11 17:16:48 +0000905 if self._lookup_is_stale:
906 self._lookup = dict(
907 (filename, index) for index, (filename, _) in enumerate(self.state))
908 self._lookup_is_stale = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000909
910
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000911class IsolatedFile(object):
912 """Represents a single parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000913 def __init__(self, obj_hash):
914 """|obj_hash| is really the sha-1 of the file."""
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000915 logging.debug('IsolatedFile(%s)' % obj_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000916 self.obj_hash = obj_hash
917 # Set once all the left-side of the tree is parsed. 'Tree' here means the
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000918 # .isolate and all the .isolated files recursively included by it with
919 # 'includes' key. The order of each sha-1 in 'includes', each representing a
920 # .isolated file in the hash table, is important, as the later ones are not
921 # processed until the firsts are retrieved and read.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000922 self.can_fetch = False
923
924 # Raw data.
925 self.data = {}
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000926 # A IsolatedFile instance, one per object in self.includes.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000927 self.children = []
928
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000929 # Set once the .isolated file is loaded.
930 self._is_parsed = False
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000931 # Set once the files are fetched.
932 self.files_fetched = False
933
934 def load(self, content):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000935 """Verifies the .isolated file is valid and loads this object with the json
936 data.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000937 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000938 logging.debug('IsolatedFile.load(%s)' % self.obj_hash)
939 assert not self._is_parsed
940 self.data = load_isolated(content)
941 self.children = [IsolatedFile(i) for i in self.data.get('includes', [])]
942 self._is_parsed = True
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000943
944 def fetch_files(self, cache, files):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000945 """Adds files in this .isolated file not present in |files| dictionary.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000946
947 Preemptively request files.
948
949 Note that |files| is modified by this function.
950 """
951 assert self.can_fetch
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000952 if not self._is_parsed or self.files_fetched:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000953 return
954 logging.debug('fetch_files(%s)' % self.obj_hash)
955 for filepath, properties in self.data.get('files', {}).iteritems():
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000956 # Root isolated has priority on the files being mapped. In particular,
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000957 # overriden files must not be fetched.
958 if filepath not in files:
959 files[filepath] = properties
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000960 if 'h' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000961 # Preemptively request files.
962 logging.debug('fetching %s' % filepath)
maruel@chromium.orge5c17132012-11-21 18:18:46 +0000963 cache.retrieve(Remote.MED, properties['h'], properties['s'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000964 self.files_fetched = True
965
966
967class Settings(object):
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000968 """Results of a completely parsed .isolated file."""
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000969 def __init__(self):
970 self.command = []
971 self.files = {}
972 self.read_only = None
973 self.relative_cwd = None
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000974 # The main .isolated file, a IsolatedFile instance.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000975 self.root = None
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000976
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000977 def load(self, cache, root_isolated_hash):
978 """Loads the .isolated and all the included .isolated asynchronously.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000979
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000980 It enables support for "included" .isolated files. They are processed in
981 strict order but fetched asynchronously from the cache. This is important so
982 that a file in an included .isolated file that is overridden by an embedding
983 .isolated file is not fetched neededlessly. The includes are fetched in one
984 pass and the files are fetched as soon as all the ones on the left-side
985 of the tree were fetched.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000986
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000987 The prioritization is very important here for nested .isolated files.
988 'includes' have the highest priority and the algorithm is optimized for both
989 deep and wide trees. A deep one is a long link of .isolated files referenced
990 one at a time by one item in 'includes'. A wide one has a large number of
991 'includes' in a single .isolated file. 'left' is defined as an included
992 .isolated file earlier in the 'includes' list. So the order of the elements
993 in 'includes' is important.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000994 """
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000995 self.root = IsolatedFile(root_isolated_hash)
csharp@chromium.org8dc52542012-11-08 20:29:55 +0000996 cache.retrieve(Remote.HIGH, root_isolated_hash, UNKNOWN_FILE_SIZE)
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000997 pending = {root_isolated_hash: self.root}
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000998 # Keeps the list of retrieved items to refuse recursive includes.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +0000999 retrieved = [root_isolated_hash]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001000
1001 def update_self(node):
1002 node.fetch_files(cache, self.files)
1003 # Grabs properties.
1004 if not self.command and node.data.get('command'):
1005 self.command = node.data['command']
1006 if self.read_only is None and node.data.get('read_only') is not None:
1007 self.read_only = node.data['read_only']
1008 if (self.relative_cwd is None and
1009 node.data.get('relative_cwd') is not None):
1010 self.relative_cwd = node.data['relative_cwd']
1011
1012 def traverse_tree(node):
1013 if node.can_fetch:
1014 if not node.files_fetched:
1015 update_self(node)
1016 will_break = False
1017 for i in node.children:
1018 if not i.can_fetch:
1019 if will_break:
1020 break
1021 # Automatically mark the first one as fetcheable.
1022 i.can_fetch = True
1023 will_break = True
1024 traverse_tree(i)
1025
1026 while pending:
1027 item_hash = cache.wait_for(pending)
1028 item = pending.pop(item_hash)
1029 item.load(open(cache.path(item_hash), 'r').read())
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001030 if item_hash == root_isolated_hash:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001031 # It's the root item.
1032 item.can_fetch = True
1033
1034 for new_child in item.children:
1035 h = new_child.obj_hash
1036 if h in retrieved:
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001037 raise ConfigError('IsolatedFile %s is retrieved recursively' % h)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001038 pending[h] = new_child
csharp@chromium.org8dc52542012-11-08 20:29:55 +00001039 cache.retrieve(Remote.HIGH, h, UNKNOWN_FILE_SIZE)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001040
1041 # Traverse the whole tree to see if files can now be fetched.
1042 traverse_tree(self.root)
1043 def check(n):
1044 return all(check(x) for x in n.children) and n.files_fetched
1045 assert check(self.root)
1046 self.relative_cwd = self.relative_cwd or ''
1047 self.read_only = self.read_only or False
1048
1049
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001050def create_directories(base_directory, files):
1051 """Creates the directory structure needed by the given list of files."""
1052 logging.debug('create_directories(%s, %d)', base_directory, len(files))
1053 # Creates the tree of directories to create.
1054 directories = set(os.path.dirname(f) for f in files)
1055 for item in list(directories):
1056 while item:
1057 directories.add(item)
1058 item = os.path.dirname(item)
1059 for d in sorted(directories):
1060 if d:
1061 os.mkdir(os.path.join(base_directory, d))
1062
1063
1064def create_links(base_directory, files):
1065 """Creates any links needed by the given set of files."""
1066 for filepath, properties in files:
1067 if 'link' not in properties:
1068 continue
1069 outfile = os.path.join(base_directory, filepath)
1070 # symlink doesn't exist on Windows. So the 'link' property should
1071 # never be specified for windows .isolated file.
1072 os.symlink(properties['l'], outfile) # pylint: disable=E1101
1073 if 'm' in properties:
1074 lchmod = getattr(os, 'lchmod', None)
1075 if lchmod:
1076 lchmod(outfile, properties['m'])
1077
1078
1079def setup_commands(base_directory, cwd, cmd):
1080 """Correctly adjusts and then returns the required working directory
1081 and command needed to run the test.
1082 """
1083 assert not os.path.isabs(cwd), 'The cwd must be a relative path, got %s' % cwd
1084 cwd = os.path.join(base_directory, cwd)
1085 if not os.path.isdir(cwd):
1086 os.makedirs(cwd)
1087
1088 # Ensure paths are correctly separated on windows.
1089 cmd[0] = cmd[0].replace('/', os.path.sep)
1090 cmd = fix_python_path(cmd)
1091
1092 return cwd, cmd
1093
1094
1095def generate_remaining_files(files):
1096 """Generates a dictionary of all the remaining files to be downloaded."""
1097 remaining = {}
1098 for filepath, props in files:
1099 if 'h' in props:
1100 remaining.setdefault(props['h'], []).append((filepath, props))
1101
1102 return remaining
1103
1104
1105def download_test_data(isolated_hash, target_directory, remote):
1106 """Downloads the dependencies to the given directory."""
1107 if not os.path.exists(target_directory):
1108 os.makedirs(target_directory)
1109
1110 settings = Settings()
1111 no_cache = NoCache(target_directory, Remote(remote))
1112
1113 # Download all the isolated files.
1114 with Profiler('GetIsolateds') as _prof:
1115 settings.load(no_cache, isolated_hash)
1116
1117 if not settings.command:
1118 print >> sys.stderr, 'No command to run'
1119 return 1
1120
1121 with Profiler('GetRest') as _prof:
1122 create_directories(target_directory, settings.files)
1123 create_links(target_directory, settings.files.iteritems())
1124
1125 cwd, cmd = setup_commands(target_directory, settings.relative_cwd,
1126 settings.command[:])
1127
1128 remaining = generate_remaining_files(settings.files.iteritems())
1129
1130 # Now block on the remaining files to be downloaded and mapped.
1131 logging.info('Retrieving remaining files')
1132 last_update = time.time()
1133 while remaining:
1134 obj = no_cache.wait_for(remaining)
1135 files = remaining.pop(obj)
1136
1137 for i, (filepath, properties) in enumerate(files):
1138 outfile = os.path.join(target_directory, filepath)
1139 logging.info(no_cache.path(obj))
1140
1141 if i + 1 == len(files):
1142 os.rename(no_cache.path(obj), outfile)
1143 else:
1144 shutil.copyfile(no_cache.path(obj), outfile)
1145
1146 if 'm' in properties:
1147 # It's not set on Windows.
1148 os.chmod(outfile, properties['m'])
1149
1150 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1151 logging.info('%d files remaining...' % len(remaining))
1152 last_update = time.time()
1153
1154 print('.isolated files successfully downloaded and setup in %s' %
1155 target_directory)
1156 print('To run this test please run the command %s from the directory %s' %
1157 (cmd, cwd))
1158
1159 return 0
1160
1161
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001162def run_tha_test(isolated_hash, cache_dir, remote, policies):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001163 """Downloads the dependencies in the cache, hardlinks them into a temporary
1164 directory and runs the executable.
1165 """
1166 settings = Settings()
1167 with Cache(cache_dir, Remote(remote), policies) as cache:
1168 outdir = make_temp_dir('run_tha_test', cache_dir)
1169 try:
1170 # Initiate all the files download.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001171 with Profiler('GetIsolateds') as _prof:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001172 # Optionally support local files.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001173 if not RE_IS_SHA1.match(isolated_hash):
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001174 # Adds it in the cache. While not strictly necessary, this simplifies
1175 # the rest.
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001176 h = hashlib.sha1(open(isolated_hash, 'r').read()).hexdigest()
1177 cache.add(isolated_hash, h)
1178 isolated_hash = h
1179 settings.load(cache, isolated_hash)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001180
1181 if not settings.command:
1182 print >> sys.stderr, 'No command to run'
1183 return 1
1184
1185 with Profiler('GetRest') as _prof:
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001186 create_directories(outdir, settings.files)
1187 create_links(outdir, settings.files.iteritems())
1188 remaining = generate_remaining_files(settings.files.iteritems())
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001189
1190 # Do bookkeeping while files are being downloaded in the background.
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001191 cwd, cmd = setup_commands(outdir, settings.relative_cwd,
1192 settings.command[:])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001193
1194 # Now block on the remaining files to be downloaded and mapped.
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001195 logging.info('Retrieving remaining files')
1196 last_update = time.time()
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001197 while remaining:
1198 obj = cache.wait_for(remaining)
1199 for filepath, properties in remaining.pop(obj):
1200 outfile = os.path.join(outdir, filepath)
1201 link_file(outfile, cache.path(obj), HARDLINK)
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001202 if 'm' in properties:
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001203 # It's not set on Windows.
maruel@chromium.orgd02e8ed2012-11-21 20:30:14 +00001204 os.chmod(outfile, properties['m'])
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001205
csharp@chromium.org9c59ff12012-12-12 02:32:29 +00001206 if time.time() - last_update > DELAY_BETWEEN_UPDATES_IN_SECS:
1207 logging.info('%d files remaining...' % len(remaining))
1208 last_update = time.time()
1209
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001210 if settings.read_only:
1211 make_writable(outdir, True)
1212 logging.info('Running %s, cwd=%s' % (cmd, cwd))
csharp@chromium.orge217f302012-11-22 16:51:53 +00001213
1214 # TODO(csharp): This should be specified somewhere else.
1215 # Add a rotating log file if one doesn't already exist.
1216 env = os.environ.copy()
1217 env.setdefault('RUN_TEST_CASES_LOG_FILE', RUN_TEST_CASES_LOG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001218 try:
1219 with Profiler('RunTest') as _prof:
csharp@chromium.orge217f302012-11-22 16:51:53 +00001220 return subprocess.call(cmd, cwd=cwd, env=env)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001221 except OSError:
1222 print >> sys.stderr, 'Failed to run %s; cwd=%s' % (cmd, cwd)
1223 raise
1224 finally:
1225 rmtree(outdir)
1226
1227
1228def main():
1229 parser = optparse.OptionParser(
1230 usage='%prog <options>', description=sys.modules[__name__].__doc__)
1231 parser.add_option(
1232 '-v', '--verbose', action='count', default=0, help='Use multiple times')
1233 parser.add_option('--no-run', action='store_true', help='Skip the run part')
1234
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001235 group = optparse.OptionGroup(parser, 'Download')
1236 group.add_option(
1237 '--download', metavar='DEST',
1238 help='Downloads files to DEST and returns without running, instead of '
1239 'downloading and then running from a temporary directory.')
1240 parser.add_option_group(group)
1241
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001242 group = optparse.OptionGroup(parser, 'Data source')
1243 group.add_option(
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001244 '-s', '--isolated',
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001245 metavar='FILE',
1246 help='File/url describing what to map or run')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001247 # TODO(maruel): Remove once not used anymore.
1248 group.add_option(
1249 '-m', '--manifest', dest='isolated', help=optparse.SUPPRESS_HELP)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001250 group.add_option(
1251 '-H', '--hash',
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001252 help='Hash of the .isolated to grab from the hash table')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001253 parser.add_option_group(group)
1254
1255 group.add_option(
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001256 '-r', '--remote', metavar='URL',
1257 default=
1258 'https://isolateserver.appspot.com/content/retrieve/default-gzip/',
1259 help='Remote where to get the items. Defaults to %default')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001260 group = optparse.OptionGroup(parser, 'Cache management')
1261 group.add_option(
1262 '--cache',
1263 default='cache',
1264 metavar='DIR',
1265 help='Cache directory, default=%default')
1266 group.add_option(
1267 '--max-cache-size',
1268 type='int',
1269 metavar='NNN',
1270 default=20*1024*1024*1024,
1271 help='Trim if the cache gets larger than this value, default=%default')
1272 group.add_option(
1273 '--min-free-space',
1274 type='int',
1275 metavar='NNN',
1276 default=1*1024*1024*1024,
1277 help='Trim if disk free space becomes lower than this value, '
1278 'default=%default')
1279 group.add_option(
1280 '--max-items',
1281 type='int',
1282 metavar='NNN',
1283 default=100000,
1284 help='Trim if more than this number of items are in the cache '
1285 'default=%default')
1286 parser.add_option_group(group)
1287
1288 options, args = parser.parse_args()
1289 level = [logging.ERROR, logging.INFO, logging.DEBUG][min(2, options.verbose)]
csharp@chromium.orgff2a4662012-11-21 20:49:32 +00001290
1291 logging_console = logging.StreamHandler()
1292 logging_console.setFormatter(logging.Formatter(
1293 '%(levelname)5s %(module)15s(%(lineno)3d): %(message)s'))
1294 logging_console.setLevel(level)
1295 logging.getLogger().addHandler(logging_console)
1296
1297 logging_rotating_file = logging.handlers.RotatingFileHandler(
1298 RUN_ISOLATED_LOG_FILE,
1299 maxBytes=10 * 1024 * 1024, backupCount=5)
1300 logging_rotating_file.setLevel(logging.DEBUG)
1301 logging_rotating_file.setFormatter(logging.Formatter(
1302 '%(asctime)s %(levelname)-8s %(module)15s(%(lineno)3d): %(message)s'))
1303 logging.getLogger().addHandler(logging_rotating_file)
1304
1305 logging.getLogger().setLevel(logging.DEBUG)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001306
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001307 if bool(options.isolated) == bool(options.hash):
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001308 logging.debug('One and only one of --isolated or --hash is required.')
maruel@chromium.org0cd0b182012-10-22 13:34:15 +00001309 parser.error('One and only one of --isolated or --hash is required.')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001310 if args:
maruel@chromium.org5dd75dd2012-12-03 15:11:32 +00001311 logging.debug('Unsupported args %s' % ' '.join(args))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001312 parser.error('Unsupported args %s' % ' '.join(args))
1313
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001314 options.cache = os.path.abspath(options.cache)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001315 policies = CachePolicies(
1316 options.max_cache_size, options.min_free_space, options.max_items)
csharp@chromium.orgffd8cf02013-01-09 21:57:38 +00001317
1318 if options.download:
1319 return download_test_data(options.isolated or options.hash,
1320 options.download, options.remote)
1321 else:
1322 try:
1323 return run_tha_test(
1324 options.isolated or options.hash,
1325 options.cache,
1326 options.remote,
1327 policies)
1328 except Exception, e:
1329 # Make sure any exception is logged.
1330 logging.exception(e)
1331 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001332
1333
1334if __name__ == '__main__':
1335 sys.exit(main())