blob: 66083726f03384479a44a8eb6c2b02ca9fbb3130 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
maruelea586f32016-04-05 11:11:33 -07002# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07003# Use of this source code is governed under the Apache License, Version 2.0
4# that can be found in the LICENSE file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00005
nodir55be77b2016-05-03 09:39:57 -07006"""Runs a command with optional isolated input/output.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
nodir55be77b2016-05-03 09:39:57 -07008Despite name "run_isolated", can run a generic non-isolated command specified as
9args.
10
11If input isolated hash is provided, fetches it, creates a tree of hard links,
12appends args to the command in the fetched isolated and runs it.
13To improve performance, keeps a local cache.
14The local cache can safely be deleted.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050015
nodirbe642ff2016-06-09 15:51:51 -070016Any ${EXECUTABLE_SUFFIX} on the command line will be replaced with ".exe" string
17on Windows and "" on other platforms.
18
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050019Any ${ISOLATED_OUTDIR} on the command line will be replaced by the location of a
20temporary directory upon execution of the command specified in the .isolated
21file. All content written to this directory will be uploaded upon termination
22and the .isolated file describing this directory will be printed to stdout.
bpastene447c1992016-06-20 15:21:47 -070023
24Any ${SWARMING_BOT_FILE} on the command line will be replaced by the value of
25the --bot-file parameter. This file is used by a swarming bot to communicate
26state of the host to tasks. It is written to by the swarming bot's
27on_before_task() hook in the swarming server's custom bot_config.py.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000028"""
29
maruele2f2cb82016-07-13 14:41:03 -070030__version__ = '0.8.2'
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000031
maruel064c0a32016-04-05 11:47:15 -070032import base64
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000033import logging
34import optparse
35import os
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000036import sys
37import tempfile
maruel064c0a32016-04-05 11:47:15 -070038import time
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000039
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000040from third_party.depot_tools import fix_encoding
41
Vadim Shtayura6b555c12014-07-23 16:22:18 -070042from utils import file_path
maruel12e30012015-10-09 11:55:35 -070043from utils import fs
maruel064c0a32016-04-05 11:47:15 -070044from utils import large
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040045from utils import logging_utils
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040046from utils import on_error
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -050047from utils import subprocess42
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000048from utils import tools
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +000049from utils import zip_package
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000050
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080051import auth
nodirbe642ff2016-06-09 15:51:51 -070052import cipd
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000053import isolateserver
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000054
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000055
vadimsh@chromium.org85071062013-08-21 23:37:45 +000056# Absolute path to this file (can be None if running from zip on Mac).
tansella4949442016-06-23 22:34:32 -070057THIS_FILE_PATH = os.path.abspath(
58 __file__.decode(sys.getfilesystemencoding())) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000059
60# Directory that contains this file (might be inside zip package).
tansella4949442016-06-23 22:34:32 -070061BASE_DIR = os.path.dirname(THIS_FILE_PATH) if __file__.decode(
62 sys.getfilesystemencoding()) else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000063
64# Directory that contains currently running script file.
maruel@chromium.org814d23f2013-10-01 19:08:00 +000065if zip_package.get_main_script_path():
66 MAIN_DIR = os.path.dirname(
67 os.path.abspath(zip_package.get_main_script_path()))
68else:
69 # This happens when 'import run_isolated' is executed at the python
70 # interactive prompt, in that case __file__ is undefined.
71 MAIN_DIR = None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000072
maruele2f2cb82016-07-13 14:41:03 -070073
74# Magic variables that can be found in the isolate task command line.
75ISOLATED_OUTDIR_PARAMETER = '${ISOLATED_OUTDIR}'
76EXECUTABLE_SUFFIX_PARAMETER = '${EXECUTABLE_SUFFIX}'
77SWARMING_BOT_FILE_PARAMETER = '${SWARMING_BOT_FILE}'
78
79
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000080# The name of the log file to use.
81RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
82
maruele2f2cb82016-07-13 14:41:03 -070083
csharp@chromium.orge217f302012-11-22 16:51:53 +000084# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000085RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000086
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000087
maruele2f2cb82016-07-13 14:41:03 -070088# Use short names for temporary directories. This is driven by Windows, which
89# imposes a relatively short maximum path length of 260 characters, often
90# referred to as MAX_PATH. It is relatively easy to create files with longer
91# path length. A use case is with recursive depedency treesV like npm packages.
92#
93# It is recommended to start the script with a `root_dir` as short as
94# possible.
95# - ir stands for isolated_run
96# - io stands for isolated_out
97# - it stands for isolated_tmp
98ISOLATED_RUN_DIR = u'ir'
99ISOLATED_OUT_DIR = u'io'
100ISOLATED_TMP_DIR = u'it'
101
102
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000103def get_as_zip_package(executable=True):
104 """Returns ZipPackage with this module and all its dependencies.
105
106 If |executable| is True will store run_isolated.py as __main__.py so that
107 zip package is directly executable be python.
108 """
109 # Building a zip package when running from another zip package is
110 # unsupported and probably unneeded.
111 assert not zip_package.is_zipped_module(sys.modules[__name__])
vadimsh@chromium.org85071062013-08-21 23:37:45 +0000112 assert THIS_FILE_PATH
113 assert BASE_DIR
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000114 package = zip_package.ZipPackage(root=BASE_DIR)
115 package.add_python_file(THIS_FILE_PATH, '__main__.py' if executable else None)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400116 package.add_python_file(os.path.join(BASE_DIR, 'isolated_format.py'))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000117 package.add_python_file(os.path.join(BASE_DIR, 'isolateserver.py'))
Vadim Shtayurae34e13a2014-02-02 11:23:26 -0800118 package.add_python_file(os.path.join(BASE_DIR, 'auth.py'))
nodirbe642ff2016-06-09 15:51:51 -0700119 package.add_python_file(os.path.join(BASE_DIR, 'cipd.py'))
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000120 package.add_directory(os.path.join(BASE_DIR, 'third_party'))
121 package.add_directory(os.path.join(BASE_DIR, 'utils'))
122 return package
123
124
Vadim Shtayuracb0b7432015-07-31 13:26:50 -0700125def make_temp_dir(prefix, root_dir=None):
126 """Returns a temporary directory.
127
128 If root_dir is given and /tmp is on same file system as root_dir, uses /tmp.
129 Otherwise makes a new temp directory under root_dir.
maruel79d5e062016-04-08 13:39:57 -0700130
131 Except on OSX, because it's dangerous to create hardlinks in $TMPDIR on OSX!
132 /System/Library/LaunchDaemons/com.apple.bsd.dirhelper.plist runs every day at
133 3:35am and deletes all files older than 3 days in $TMPDIR, but hardlinks do
134 not have the inode modification time updated, so they tend to be old, thus
135 they get deleted.
Vadim Shtayuracb0b7432015-07-31 13:26:50 -0700136 """
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000137 base_temp_dir = None
maruel79d5e062016-04-08 13:39:57 -0700138 real_temp_dir = unicode(tempfile.gettempdir())
139 if sys.platform == 'darwin':
140 # Nope! Nope! Nope!
141 assert root_dir, 'It is unsafe to create hardlinks in $TMPDIR'
142 base_temp_dir = root_dir
143 elif root_dir and not file_path.is_same_filesystem(root_dir, real_temp_dir):
Paweł Hajdan, Jrf7d58722015-04-27 14:54:42 +0200144 base_temp_dir = root_dir
marueleb5fbee2015-09-17 13:01:36 -0700145 return unicode(tempfile.mkdtemp(prefix=prefix, dir=base_temp_dir))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000146
147
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500148def change_tree_read_only(rootdir, read_only):
149 """Changes the tree read-only bits according to the read_only specification.
150
151 The flag can be 0, 1 or 2, which will affect the possibility to modify files
152 and create or delete files.
153 """
154 if read_only == 2:
155 # Files and directories (except on Windows) are marked read only. This
156 # inhibits modifying, creating or deleting files in the test directory,
157 # except on Windows where creating and deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400158 file_path.make_tree_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500159 elif read_only == 1:
160 # Files are marked read only but not the directories. This inhibits
161 # modifying files but creating or deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400162 file_path.make_tree_files_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500163 elif read_only in (0, None):
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500164 # Anything can be modified.
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500165 # TODO(maruel): This is currently dangerous as long as DiskCache.touch()
166 # is not yet changed to verify the hash of the content of the files it is
167 # looking at, so that if a test modifies an input file, the file must be
168 # deleted.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400169 file_path.make_tree_writeable(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500170 else:
171 raise ValueError(
172 'change_tree_read_only(%s, %s): Unknown flag %s' %
173 (rootdir, read_only, read_only))
174
175
nodir90bc8dc2016-06-15 13:35:21 -0700176def process_command(command, out_dir, bot_file):
nodirbe642ff2016-06-09 15:51:51 -0700177 """Replaces variables in a command line.
178
179 Raises:
180 ValueError if a parameter is requested in |command| but its value is not
181 provided.
182 """
maruela9cfd6f2015-09-15 11:03:15 -0700183 def fix(arg):
nodirbe642ff2016-06-09 15:51:51 -0700184 arg = arg.replace(EXECUTABLE_SUFFIX_PARAMETER, cipd.EXECUTABLE_SUFFIX)
185 replace_slash = False
nodir55be77b2016-05-03 09:39:57 -0700186 if ISOLATED_OUTDIR_PARAMETER in arg:
nodirbe642ff2016-06-09 15:51:51 -0700187 if not out_dir:
maruel7f63a272016-07-12 12:40:36 -0700188 raise ValueError(
189 'output directory is requested in command, but not provided; '
190 'please specify one')
nodir55be77b2016-05-03 09:39:57 -0700191 arg = arg.replace(ISOLATED_OUTDIR_PARAMETER, out_dir)
nodirbe642ff2016-06-09 15:51:51 -0700192 replace_slash = True
nodir90bc8dc2016-06-15 13:35:21 -0700193 if SWARMING_BOT_FILE_PARAMETER in arg:
194 if bot_file:
195 arg = arg.replace(SWARMING_BOT_FILE_PARAMETER, bot_file)
196 replace_slash = True
197 else:
198 logging.warning('SWARMING_BOT_FILE_PARAMETER found in command, but no '
199 'bot_file specified. Leaving parameter unchanged.')
nodirbe642ff2016-06-09 15:51:51 -0700200 if replace_slash:
201 # Replace slashes only if parameters are present
nodir55be77b2016-05-03 09:39:57 -0700202 # because of arguments like '${ISOLATED_OUTDIR}/foo/bar'
203 arg = arg.replace('/', os.sep)
maruela9cfd6f2015-09-15 11:03:15 -0700204 return arg
205
206 return [fix(arg) for arg in command]
207
208
maruel6be7f9e2015-10-01 12:25:30 -0700209def run_command(command, cwd, tmp_dir, hard_timeout, grace_period):
210 """Runs the command.
211
212 Returns:
213 tuple(process exit code, bool if had a hard timeout)
214 """
maruela9cfd6f2015-09-15 11:03:15 -0700215 logging.info('run_command(%s, %s)' % (command, cwd))
marueleb5fbee2015-09-17 13:01:36 -0700216
217 env = os.environ.copy()
218 if sys.platform == 'darwin':
tansella4949442016-06-23 22:34:32 -0700219 env['TMPDIR'] = tmp_dir.encode(sys.getfilesystemencoding())
marueleb5fbee2015-09-17 13:01:36 -0700220 elif sys.platform == 'win32':
tansella4949442016-06-23 22:34:32 -0700221 env['TEMP'] = tmp_dir.encode(sys.getfilesystemencoding())
marueleb5fbee2015-09-17 13:01:36 -0700222 else:
tansella4949442016-06-23 22:34:32 -0700223 env['TMP'] = tmp_dir.encode(sys.getfilesystemencoding())
maruel6be7f9e2015-10-01 12:25:30 -0700224 exit_code = None
225 had_hard_timeout = False
maruela9cfd6f2015-09-15 11:03:15 -0700226 with tools.Profiler('RunTest'):
maruel6be7f9e2015-10-01 12:25:30 -0700227 proc = None
228 had_signal = []
maruela9cfd6f2015-09-15 11:03:15 -0700229 try:
maruel6be7f9e2015-10-01 12:25:30 -0700230 # TODO(maruel): This code is imperfect. It doesn't handle well signals
231 # during the download phase and there's short windows were things can go
232 # wrong.
233 def handler(signum, _frame):
234 if proc and not had_signal:
235 logging.info('Received signal %d', signum)
236 had_signal.append(True)
maruel556d9052015-10-05 11:12:44 -0700237 raise subprocess42.TimeoutExpired(command, None)
maruel6be7f9e2015-10-01 12:25:30 -0700238
239 proc = subprocess42.Popen(command, cwd=cwd, env=env, detached=True)
240 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
241 try:
242 exit_code = proc.wait(hard_timeout or None)
243 except subprocess42.TimeoutExpired:
244 if not had_signal:
245 logging.warning('Hard timeout')
246 had_hard_timeout = True
247 logging.warning('Sending SIGTERM')
248 proc.terminate()
249
250 # Ignore signals in grace period. Forcibly give the grace period to the
251 # child process.
252 if exit_code is None:
253 ignore = lambda *_: None
254 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
255 try:
256 exit_code = proc.wait(grace_period or None)
257 except subprocess42.TimeoutExpired:
258 # Now kill for real. The user can distinguish between the
259 # following states:
260 # - signal but process exited within grace period,
261 # hard_timed_out will be set but the process exit code will be
262 # script provided.
263 # - processed exited late, exit code will be -9 on posix.
264 logging.warning('Grace exhausted; sending SIGKILL')
265 proc.kill()
266 logging.info('Waiting for proces exit')
267 exit_code = proc.wait()
maruela9cfd6f2015-09-15 11:03:15 -0700268 except OSError:
269 # This is not considered to be an internal error. The executable simply
270 # does not exit.
maruela72f46e2016-02-24 11:05:45 -0800271 sys.stderr.write(
272 '<The executable does not exist or a dependent library is missing>\n'
273 '<Check for missing .so/.dll in the .isolate or GN file>\n'
274 '<Command: %s>\n' % command)
275 if os.environ.get('SWARMING_TASK_ID'):
276 # Give an additional hint when running as a swarming task.
277 sys.stderr.write(
278 '<See the task\'s page for commands to help diagnose this issue '
279 'by reproducing the task locally>\n')
maruela9cfd6f2015-09-15 11:03:15 -0700280 exit_code = 1
281 logging.info(
282 'Command finished with exit code %d (%s)',
283 exit_code, hex(0xffffffff & exit_code))
maruel6be7f9e2015-10-01 12:25:30 -0700284 return exit_code, had_hard_timeout
maruela9cfd6f2015-09-15 11:03:15 -0700285
286
nodir6f801882016-04-29 14:41:50 -0700287def fetch_and_measure(isolated_hash, storage, cache, outdir):
288 """Fetches an isolated and returns (bundle, stats)."""
289 start = time.time()
290 bundle = isolateserver.fetch_isolated(
291 isolated_hash=isolated_hash,
292 storage=storage,
293 cache=cache,
294 outdir=outdir)
295 return bundle, {
296 'duration': time.time() - start,
297 'initial_number_items': cache.initial_number_items,
298 'initial_size': cache.initial_size,
299 'items_cold': base64.b64encode(large.pack(sorted(cache.added))),
300 'items_hot': base64.b64encode(
301 large.pack(sorted(set(cache.linked) - set(cache.added)))),
302 }
303
304
maruela9cfd6f2015-09-15 11:03:15 -0700305def delete_and_upload(storage, out_dir, leak_temp_dir):
306 """Deletes the temporary run directory and uploads results back.
307
308 Returns:
nodir6f801882016-04-29 14:41:50 -0700309 tuple(outputs_ref, success, stats)
maruel064c0a32016-04-05 11:47:15 -0700310 - outputs_ref: a dict referring to the results archived back to the isolated
311 server, if applicable.
312 - success: False if something occurred that means that the task must
313 forcibly be considered a failure, e.g. zombie processes were left
314 behind.
nodir6f801882016-04-29 14:41:50 -0700315 - stats: uploading stats.
maruela9cfd6f2015-09-15 11:03:15 -0700316 """
317
318 # Upload out_dir and generate a .isolated file out of this directory. It is
319 # only done if files were written in the directory.
320 outputs_ref = None
maruel064c0a32016-04-05 11:47:15 -0700321 cold = []
322 hot = []
nodir6f801882016-04-29 14:41:50 -0700323 start = time.time()
324
maruel12e30012015-10-09 11:55:35 -0700325 if fs.isdir(out_dir) and fs.listdir(out_dir):
maruela9cfd6f2015-09-15 11:03:15 -0700326 with tools.Profiler('ArchiveOutput'):
327 try:
maruel064c0a32016-04-05 11:47:15 -0700328 results, f_cold, f_hot = isolateserver.archive_files_to_storage(
maruela9cfd6f2015-09-15 11:03:15 -0700329 storage, [out_dir], None)
330 outputs_ref = {
331 'isolated': results[0][0],
332 'isolatedserver': storage.location,
333 'namespace': storage.namespace,
334 }
maruel064c0a32016-04-05 11:47:15 -0700335 cold = sorted(i.size for i in f_cold)
336 hot = sorted(i.size for i in f_hot)
maruela9cfd6f2015-09-15 11:03:15 -0700337 except isolateserver.Aborted:
338 # This happens when a signal SIGTERM was received while uploading data.
339 # There is 2 causes:
340 # - The task was too slow and was about to be killed anyway due to
341 # exceeding the hard timeout.
342 # - The amount of data uploaded back is very large and took too much
343 # time to archive.
344 sys.stderr.write('Received SIGTERM while uploading')
345 # Re-raise, so it will be treated as an internal failure.
346 raise
nodir6f801882016-04-29 14:41:50 -0700347
348 success = False
maruela9cfd6f2015-09-15 11:03:15 -0700349 try:
maruel12e30012015-10-09 11:55:35 -0700350 if (not leak_temp_dir and fs.isdir(out_dir) and
maruel6eeea7d2015-09-16 12:17:42 -0700351 not file_path.rmtree(out_dir)):
maruela9cfd6f2015-09-15 11:03:15 -0700352 logging.error('Had difficulties removing out_dir %s', out_dir)
nodir6f801882016-04-29 14:41:50 -0700353 else:
354 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700355 except OSError as e:
356 # When this happens, it means there's a process error.
maruel12e30012015-10-09 11:55:35 -0700357 logging.exception('Had difficulties removing out_dir %s: %s', out_dir, e)
nodir6f801882016-04-29 14:41:50 -0700358 stats = {
359 'duration': time.time() - start,
360 'items_cold': base64.b64encode(large.pack(cold)),
361 'items_hot': base64.b64encode(large.pack(hot)),
362 }
363 return outputs_ref, success, stats
maruela9cfd6f2015-09-15 11:03:15 -0700364
365
marueleb5fbee2015-09-17 13:01:36 -0700366def map_and_run(
nodir55be77b2016-05-03 09:39:57 -0700367 command, isolated_hash, storage, cache, leak_temp_dir, root_dir,
nodir90bc8dc2016-06-15 13:35:21 -0700368 hard_timeout, grace_period, bot_file, extra_args, install_packages_fn):
nodir55be77b2016-05-03 09:39:57 -0700369 """Runs a command with optional isolated input/output.
370
371 See run_tha_test for argument documentation.
372
373 Returns metadata about the result.
374 """
375 assert bool(command) ^ bool(isolated_hash)
maruela9cfd6f2015-09-15 11:03:15 -0700376 result = {
maruel064c0a32016-04-05 11:47:15 -0700377 'duration': None,
maruela9cfd6f2015-09-15 11:03:15 -0700378 'exit_code': None,
maruel6be7f9e2015-10-01 12:25:30 -0700379 'had_hard_timeout': False,
maruela9cfd6f2015-09-15 11:03:15 -0700380 'internal_failure': None,
maruel064c0a32016-04-05 11:47:15 -0700381 'stats': {
nodir55715712016-06-03 12:28:19 -0700382 # 'isolated': {
nodirbe642ff2016-06-09 15:51:51 -0700383 # 'cipd': {
384 # 'duration': 0.,
385 # 'get_client_duration': 0.,
386 # },
nodir55715712016-06-03 12:28:19 -0700387 # 'download': {
388 # 'duration': 0.,
389 # 'initial_number_items': 0,
390 # 'initial_size': 0,
391 # 'items_cold': '<large.pack()>',
392 # 'items_hot': '<large.pack()>',
393 # },
394 # 'upload': {
395 # 'duration': 0.,
396 # 'items_cold': '<large.pack()>',
397 # 'items_hot': '<large.pack()>',
398 # },
maruel064c0a32016-04-05 11:47:15 -0700399 # },
400 },
maruela9cfd6f2015-09-15 11:03:15 -0700401 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700402 'version': 5,
maruela9cfd6f2015-09-15 11:03:15 -0700403 }
nodirbe642ff2016-06-09 15:51:51 -0700404
marueleb5fbee2015-09-17 13:01:36 -0700405 if root_dir:
nodire5028a92016-04-29 14:38:21 -0700406 file_path.ensure_tree(root_dir, 0700)
marueleb5fbee2015-09-17 13:01:36 -0700407 else:
408 root_dir = os.path.dirname(cache.cache_dir) if cache.cache_dir else None
maruele2f2cb82016-07-13 14:41:03 -0700409 # See comment for these constants.
410 run_dir = make_temp_dir(ISOLATED_RUN_DIR, root_dir)
411 out_dir = make_temp_dir(ISOLATED_OUT_DIR, root_dir) if storage else None
412 tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, root_dir)
nodir55be77b2016-05-03 09:39:57 -0700413 cwd = run_dir
maruela9cfd6f2015-09-15 11:03:15 -0700414
nodir55be77b2016-05-03 09:39:57 -0700415 try:
nodir90bc8dc2016-06-15 13:35:21 -0700416 cipd_stats = install_packages_fn(run_dir)
417 if cipd_stats:
418 result['stats']['cipd'] = cipd_stats
419
nodir55be77b2016-05-03 09:39:57 -0700420 if isolated_hash:
nodir55715712016-06-03 12:28:19 -0700421 isolated_stats = result['stats'].setdefault('isolated', {})
422 bundle, isolated_stats['download'] = fetch_and_measure(
nodir55be77b2016-05-03 09:39:57 -0700423 isolated_hash=isolated_hash,
424 storage=storage,
425 cache=cache,
426 outdir=run_dir)
427 if not bundle.command:
428 # Handle this as a task failure, not an internal failure.
429 sys.stderr.write(
430 '<The .isolated doesn\'t declare any command to run!>\n'
431 '<Check your .isolate for missing \'command\' variable>\n')
432 if os.environ.get('SWARMING_TASK_ID'):
433 # Give an additional hint when running as a swarming task.
434 sys.stderr.write('<This occurs at the \'isolate\' step>\n')
435 result['exit_code'] = 1
436 return result
437
438 change_tree_read_only(run_dir, bundle.read_only)
439 cwd = os.path.normpath(os.path.join(cwd, bundle.relative_cwd))
440 command = bundle.command + extra_args
nodirbe642ff2016-06-09 15:51:51 -0700441
nodir34d673c2016-05-24 09:30:48 -0700442 command = tools.fix_python_path(command)
nodir90bc8dc2016-06-15 13:35:21 -0700443 command = process_command(command, out_dir, bot_file)
maruela9cfd6f2015-09-15 11:03:15 -0700444 file_path.ensure_command_has_abs_path(command, cwd)
nodirbe642ff2016-06-09 15:51:51 -0700445
maruel064c0a32016-04-05 11:47:15 -0700446 sys.stdout.flush()
447 start = time.time()
448 try:
449 result['exit_code'], result['had_hard_timeout'] = run_command(
nodirbe642ff2016-06-09 15:51:51 -0700450 command, cwd, tmp_dir, hard_timeout, grace_period)
maruel064c0a32016-04-05 11:47:15 -0700451 finally:
452 result['duration'] = max(time.time() - start, 0)
maruela9cfd6f2015-09-15 11:03:15 -0700453 except Exception as e:
nodir90bc8dc2016-06-15 13:35:21 -0700454 # An internal error occurred. Report accordingly so the swarming task will
455 # be retried automatically.
maruel12e30012015-10-09 11:55:35 -0700456 logging.exception('internal failure: %s', e)
maruela9cfd6f2015-09-15 11:03:15 -0700457 result['internal_failure'] = str(e)
458 on_error.report(None)
459 finally:
460 try:
461 if leak_temp_dir:
462 logging.warning(
463 'Deliberately leaking %s for later examination', run_dir)
marueleb5fbee2015-09-17 13:01:36 -0700464 else:
maruel84537cb2015-10-16 14:21:28 -0700465 # On Windows rmtree(run_dir) call above has a synchronization effect: it
466 # finishes only when all task child processes terminate (since a running
467 # process locks *.exe file). Examine out_dir only after that call
468 # completes (since child processes may write to out_dir too and we need
469 # to wait for them to finish).
470 if fs.isdir(run_dir):
471 try:
472 success = file_path.rmtree(run_dir)
473 except OSError as e:
474 logging.error('Failure with %s', e)
475 success = False
476 if not success:
477 print >> sys.stderr, (
478 'Failed to delete the run directory, forcibly failing\n'
479 'the task because of it. No zombie process can outlive a\n'
480 'successful task run and still be marked as successful.\n'
481 'Fix your stuff.')
482 if result['exit_code'] == 0:
483 result['exit_code'] = 1
484 if fs.isdir(tmp_dir):
485 try:
486 success = file_path.rmtree(tmp_dir)
487 except OSError as e:
488 logging.error('Failure with %s', e)
489 success = False
490 if not success:
491 print >> sys.stderr, (
492 'Failed to delete the temporary directory, forcibly failing\n'
493 'the task because of it. No zombie process can outlive a\n'
494 'successful task run and still be marked as successful.\n'
495 'Fix your stuff.')
496 if result['exit_code'] == 0:
497 result['exit_code'] = 1
maruela9cfd6f2015-09-15 11:03:15 -0700498
marueleb5fbee2015-09-17 13:01:36 -0700499 # This deletes out_dir if leak_temp_dir is not set.
nodir9130f072016-05-27 13:59:08 -0700500 if out_dir:
nodir55715712016-06-03 12:28:19 -0700501 isolated_stats = result['stats'].setdefault('isolated', {})
502 result['outputs_ref'], success, isolated_stats['upload'] = (
nodir9130f072016-05-27 13:59:08 -0700503 delete_and_upload(storage, out_dir, leak_temp_dir))
maruela9cfd6f2015-09-15 11:03:15 -0700504 if not success and result['exit_code'] == 0:
505 result['exit_code'] = 1
506 except Exception as e:
507 # Swallow any exception in the main finally clause.
nodir9130f072016-05-27 13:59:08 -0700508 if out_dir:
509 logging.exception('Leaking out_dir %s: %s', out_dir, e)
maruela9cfd6f2015-09-15 11:03:15 -0700510 result['internal_failure'] = str(e)
511 return result
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500512
513
Marc-Antoine Ruel0ec868b2015-08-12 14:12:46 -0400514def run_tha_test(
nodir55be77b2016-05-03 09:39:57 -0700515 command, isolated_hash, storage, cache, leak_temp_dir, result_json,
bpastene3ae09522016-06-10 17:12:59 -0700516 root_dir, hard_timeout, grace_period, bot_file, extra_args,
nodir90bc8dc2016-06-15 13:35:21 -0700517 install_packages_fn):
nodir55be77b2016-05-03 09:39:57 -0700518 """Runs an executable and records execution metadata.
519
520 Either command or isolated_hash must be specified.
521
522 If isolated_hash is specified, downloads the dependencies in the cache,
523 hardlinks them into a temporary directory and runs the command specified in
524 the .isolated.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500525
526 A temporary directory is created to hold the output files. The content inside
527 this directory will be uploaded back to |storage| packaged as a .isolated
528 file.
529
530 Arguments:
nodir55be77b2016-05-03 09:39:57 -0700531 command: the command to run, a list of strings. Mutually exclusive with
532 isolated_hash.
Marc-Antoine Ruel35b58432014-12-08 17:40:40 -0500533 isolated_hash: the SHA-1 of the .isolated file that must be retrieved to
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500534 recreate the tree of files to run the target executable.
nodir55be77b2016-05-03 09:39:57 -0700535 The command specified in the .isolated is executed.
536 Mutually exclusive with command argument.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500537 storage: an isolateserver.Storage object to retrieve remote objects. This
538 object has a reference to an isolateserver.StorageApi, which does
539 the actual I/O.
540 cache: an isolateserver.LocalCache to keep from retrieving the same objects
541 constantly by caching the objects retrieved. Can be on-disk or
542 in-memory.
Kenneth Russell61d42352014-09-15 11:41:16 -0700543 leak_temp_dir: if true, the temporary directory will be deliberately leaked
544 for later examination.
maruela9cfd6f2015-09-15 11:03:15 -0700545 result_json: file path to dump result metadata into. If set, the process
nodirbe642ff2016-06-09 15:51:51 -0700546 exit code is always 0 unless an internal error occurred.
nodir90bc8dc2016-06-15 13:35:21 -0700547 root_dir: path to the directory to use to create the temporary directory. If
marueleb5fbee2015-09-17 13:01:36 -0700548 not specified, a random temporary directory is created.
maruel6be7f9e2015-10-01 12:25:30 -0700549 hard_timeout: kills the process if it lasts more than this amount of
550 seconds.
551 grace_period: number of seconds to wait between SIGTERM and SIGKILL.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500552 extra_args: optional arguments to add to the command stated in the .isolate
nodir55be77b2016-05-03 09:39:57 -0700553 file. Ignored if isolate_hash is empty.
nodir90bc8dc2016-06-15 13:35:21 -0700554 install_packages_fn: function (dir) => cipd_stats. Installs packages.
maruela9cfd6f2015-09-15 11:03:15 -0700555
556 Returns:
557 Process exit code that should be used.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000558 """
nodir55be77b2016-05-03 09:39:57 -0700559 assert bool(command) ^ bool(isolated_hash)
560 extra_args = extra_args or []
nodirbe642ff2016-06-09 15:51:51 -0700561
nodir55be77b2016-05-03 09:39:57 -0700562 if any(ISOLATED_OUTDIR_PARAMETER in a for a in (command or extra_args)):
563 assert storage is not None, 'storage is None although outdir is specified'
564
maruela76b9ee2015-12-15 06:18:08 -0800565 if result_json:
566 # Write a json output file right away in case we get killed.
567 result = {
568 'exit_code': None,
569 'had_hard_timeout': False,
570 'internal_failure': 'Was terminated before completion',
571 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700572 'version': 5,
maruela76b9ee2015-12-15 06:18:08 -0800573 }
574 tools.write_json(result_json, result, dense=True)
575
maruela9cfd6f2015-09-15 11:03:15 -0700576 # run_isolated exit code. Depends on if result_json is used or not.
577 result = map_and_run(
nodir55be77b2016-05-03 09:39:57 -0700578 command, isolated_hash, storage, cache, leak_temp_dir, root_dir,
nodir90bc8dc2016-06-15 13:35:21 -0700579 hard_timeout, grace_period, bot_file, extra_args, install_packages_fn)
maruela9cfd6f2015-09-15 11:03:15 -0700580 logging.info('Result:\n%s', tools.format_json(result, dense=True))
bpastene3ae09522016-06-10 17:12:59 -0700581
maruela9cfd6f2015-09-15 11:03:15 -0700582 if result_json:
maruel05d5a882015-09-21 13:59:02 -0700583 # We've found tests to delete 'work' when quitting, causing an exception
584 # here. Try to recreate the directory if necessary.
nodire5028a92016-04-29 14:38:21 -0700585 file_path.ensure_tree(os.path.dirname(result_json))
maruela9cfd6f2015-09-15 11:03:15 -0700586 tools.write_json(result_json, result, dense=True)
587 # Only return 1 if there was an internal error.
588 return int(bool(result['internal_failure']))
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000589
maruela9cfd6f2015-09-15 11:03:15 -0700590 # Marshall into old-style inline output.
591 if result['outputs_ref']:
592 data = {
593 'hash': result['outputs_ref']['isolated'],
594 'namespace': result['outputs_ref']['namespace'],
595 'storage': result['outputs_ref']['isolatedserver'],
596 }
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -0500597 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700598 print(
599 '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
600 tools.format_json(data, dense=True))
maruelb76604c2015-11-11 11:53:44 -0800601 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700602 return result['exit_code'] or int(bool(result['internal_failure']))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000603
604
nodir90bc8dc2016-06-15 13:35:21 -0700605def install_packages(
nodirff531b42016-06-23 13:05:06 -0700606 run_dir, packages, service_url, client_package_name,
nodir90bc8dc2016-06-15 13:35:21 -0700607 client_version, cache_dir=None, timeout=None):
608 """Installs packages. Returns stats.
nodirbe642ff2016-06-09 15:51:51 -0700609
610 Args:
nodir90bc8dc2016-06-15 13:35:21 -0700611 run_dir (str): root of installation.
nodirff531b42016-06-23 13:05:06 -0700612 packages: packages to install, dict {path: [(package_name, version)].
nodirbe642ff2016-06-09 15:51:51 -0700613 service_url (str): CIPD server url, e.g.
614 "https://chrome-infra-packages.appspot.com."
nodir90bc8dc2016-06-15 13:35:21 -0700615 client_package_name (str): CIPD package name of CIPD client.
616 client_version (str): Version of CIPD client.
nodirbe642ff2016-06-09 15:51:51 -0700617 cache_dir (str): where to keep cache of cipd clients, packages and tags.
618 timeout: max duration in seconds that this function can take.
nodirbe642ff2016-06-09 15:51:51 -0700619 """
620 assert cache_dir
nodirff531b42016-06-23 13:05:06 -0700621 if not packages:
nodir90bc8dc2016-06-15 13:35:21 -0700622 return None
623
nodirbe642ff2016-06-09 15:51:51 -0700624 timeoutfn = tools.sliding_timeout(timeout)
nodirbe642ff2016-06-09 15:51:51 -0700625 start = time.time()
nodirbe642ff2016-06-09 15:51:51 -0700626 cache_dir = os.path.abspath(cache_dir)
627
nodir90bc8dc2016-06-15 13:35:21 -0700628 run_dir = os.path.abspath(run_dir)
nodir90bc8dc2016-06-15 13:35:21 -0700629
nodirbe642ff2016-06-09 15:51:51 -0700630 get_client_start = time.time()
631 client_manager = cipd.get_client(
632 service_url, client_package_name, client_version, cache_dir,
633 timeout=timeoutfn())
634 with client_manager as client:
635 get_client_duration = time.time() - get_client_start
nodirff531b42016-06-23 13:05:06 -0700636 for path, packages in sorted(packages.iteritems()):
nodir90bc8dc2016-06-15 13:35:21 -0700637 site_root = os.path.abspath(os.path.join(run_dir, path))
638 if not site_root.startswith(run_dir):
639 raise cipd.Error('Invalid CIPD package path "%s"' % path)
640
641 # Do not clean site_root before installation because it may contain other
642 # site roots.
643 file_path.ensure_tree(site_root, 0770)
nodirbe642ff2016-06-09 15:51:51 -0700644 client.ensure(
645 site_root, packages,
646 cache_dir=os.path.join(cache_dir, 'cipd_internal'),
647 timeout=timeoutfn())
nodirbe642ff2016-06-09 15:51:51 -0700648 file_path.make_tree_files_read_only(site_root)
nodir90bc8dc2016-06-15 13:35:21 -0700649
650 total_duration = time.time() - start
651 logging.info(
652 'Installing CIPD client and packages took %d seconds', total_duration)
653
654 return {
655 'duration': total_duration,
656 'get_client_duration': get_client_duration,
657 }
nodirbe642ff2016-06-09 15:51:51 -0700658
659
660def create_option_parser():
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -0400661 parser = logging_utils.OptionParserWithLogging(
nodir55be77b2016-05-03 09:39:57 -0700662 usage='%prog <options> [command to run or extra args]',
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000663 version=__version__,
664 log_file=RUN_ISOLATED_LOG_FILE)
maruela9cfd6f2015-09-15 11:03:15 -0700665 parser.add_option(
maruel36a963d2016-04-08 17:15:49 -0700666 '--clean', action='store_true',
667 help='Cleans the cache, trimming it necessary and remove corrupted items '
668 'and returns without executing anything; use with -v to know what '
669 'was done')
670 parser.add_option(
maruela9cfd6f2015-09-15 11:03:15 -0700671 '--json',
672 help='dump output metadata to json file. When used, run_isolated returns '
673 'non-zero only on internal failure')
maruel6be7f9e2015-10-01 12:25:30 -0700674 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -0800675 '--hard-timeout', type='float', help='Enforce hard timeout in execution')
maruel6be7f9e2015-10-01 12:25:30 -0700676 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -0800677 '--grace-period', type='float',
maruel6be7f9e2015-10-01 12:25:30 -0700678 help='Grace period between SIGTERM and SIGKILL')
bpastene3ae09522016-06-10 17:12:59 -0700679 parser.add_option(
680 '--bot-file',
681 help='Path to a file describing the state of the host. The content is '
682 'defined by on_before_task() in bot_config.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500683 data_group = optparse.OptionGroup(parser, 'Data source')
684 data_group.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500685 '-s', '--isolated',
nodir55be77b2016-05-03 09:39:57 -0700686 help='Hash of the .isolated to grab from the isolate server.')
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -0500687 isolateserver.add_isolate_server_options(data_group)
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500688 parser.add_option_group(data_group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000689
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -0400690 isolateserver.add_cache_options(parser)
nodirbe642ff2016-06-09 15:51:51 -0700691
692 cipd.add_cipd_options(parser)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000693
Kenneth Russell61d42352014-09-15 11:41:16 -0700694 debug_group = optparse.OptionGroup(parser, 'Debugging')
695 debug_group.add_option(
696 '--leak-temp-dir',
697 action='store_true',
nodirbe642ff2016-06-09 15:51:51 -0700698 help='Deliberately leak isolate\'s temp dir for later examination. '
699 'Default: %default')
marueleb5fbee2015-09-17 13:01:36 -0700700 debug_group.add_option(
701 '--root-dir', help='Use a directory instead of a random one')
Kenneth Russell61d42352014-09-15 11:41:16 -0700702 parser.add_option_group(debug_group)
703
Vadim Shtayurae34e13a2014-02-02 11:23:26 -0800704 auth.add_auth_options(parser)
nodirbe642ff2016-06-09 15:51:51 -0700705
706 parser.set_defaults(cache='cache', cipd_cache='cipd_cache')
707 return parser
708
709
710def main(args):
711 parser = create_option_parser()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -0500712 options, args = parser.parse_args(args)
maruel36a963d2016-04-08 17:15:49 -0700713
714 cache = isolateserver.process_cache_options(options)
715 if options.clean:
716 if options.isolated:
717 parser.error('Can\'t use --isolated with --clean.')
718 if options.isolate_server:
719 parser.error('Can\'t use --isolate-server with --clean.')
720 if options.json:
721 parser.error('Can\'t use --json with --clean.')
722 cache.cleanup()
723 return 0
724
nodir55be77b2016-05-03 09:39:57 -0700725 if not options.isolated and not args:
726 parser.error('--isolated or command to run is required.')
727
Vadim Shtayura5d1efce2014-02-04 10:55:43 -0800728 auth.process_auth_options(parser, options)
nodir55be77b2016-05-03 09:39:57 -0700729
730 isolateserver.process_isolate_server_options(
731 parser, options, True, False)
732 if not options.isolate_server:
733 if options.isolated:
734 parser.error('--isolated requires --isolate-server')
735 if ISOLATED_OUTDIR_PARAMETER in args:
736 parser.error(
737 '%s in args requires --isolate-server' % ISOLATED_OUTDIR_PARAMETER)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000738
nodir90bc8dc2016-06-15 13:35:21 -0700739 if options.root_dir:
740 options.root_dir = unicode(os.path.abspath(options.root_dir))
maruel12e30012015-10-09 11:55:35 -0700741 if options.json:
742 options.json = unicode(os.path.abspath(options.json))
nodir55be77b2016-05-03 09:39:57 -0700743
nodirbe642ff2016-06-09 15:51:51 -0700744 cipd.validate_cipd_options(parser, options)
745
nodir90bc8dc2016-06-15 13:35:21 -0700746 install_packages_fn = lambda run_dir: install_packages(
nodirff531b42016-06-23 13:05:06 -0700747 run_dir, cipd.parse_package_args(options.cipd_packages),
748 options.cipd_server, options.cipd_client_package,
749 options.cipd_client_version, cache_dir=options.cipd_cache)
nodirbe642ff2016-06-09 15:51:51 -0700750
751 try:
nodir90bc8dc2016-06-15 13:35:21 -0700752 command = [] if options.isolated else args
753 if options.isolate_server:
754 storage = isolateserver.get_storage(
755 options.isolate_server, options.namespace)
756 with storage:
757 # Hashing schemes used by |storage| and |cache| MUST match.
758 assert storage.hash_algo == cache.hash_algo
nodirbe642ff2016-06-09 15:51:51 -0700759 return run_tha_test(
nodir90bc8dc2016-06-15 13:35:21 -0700760 command, options.isolated, storage, cache, options.leak_temp_dir,
761 options.json, options.root_dir, options.hard_timeout,
762 options.grace_period, options.bot_file, args, install_packages_fn)
763 else:
764 return run_tha_test(
765 command, options.isolated, None, cache, options.leak_temp_dir,
766 options.json, options.root_dir, options.hard_timeout,
767 options.grace_period, options.bot_file, args, install_packages_fn)
nodirbe642ff2016-06-09 15:51:51 -0700768 except cipd.Error as ex:
769 print >> sys.stderr, ex.message
770 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000771
772
773if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -0700774 subprocess42.inhibit_os_error_reporting()
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000775 # Ensure that we are always running with the correct encoding.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +0000776 fix_encoding.fix_encoding()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -0500777 sys.exit(main(sys.argv[1:]))