blob: a879c221e39bdbcfb91a88c15837c2e9c90cd6b8 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
maruelea586f32016-04-05 11:11:33 -07002# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07003# Use of this source code is governed under the Apache License, Version 2.0
4# that can be found in the LICENSE file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00005
nodir55be77b2016-05-03 09:39:57 -07006"""Runs a command with optional isolated input/output.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
nodir55be77b2016-05-03 09:39:57 -07008Despite name "run_isolated", can run a generic non-isolated command specified as
9args.
10
11If input isolated hash is provided, fetches it, creates a tree of hard links,
12appends args to the command in the fetched isolated and runs it.
13To improve performance, keeps a local cache.
14The local cache can safely be deleted.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050015
nodirbe642ff2016-06-09 15:51:51 -070016Any ${EXECUTABLE_SUFFIX} on the command line will be replaced with ".exe" string
17on Windows and "" on other platforms.
18
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050019Any ${ISOLATED_OUTDIR} on the command line will be replaced by the location of a
20temporary directory upon execution of the command specified in the .isolated
21file. All content written to this directory will be uploaded upon termination
22and the .isolated file describing this directory will be printed to stdout.
bpastene447c1992016-06-20 15:21:47 -070023
24Any ${SWARMING_BOT_FILE} on the command line will be replaced by the value of
25the --bot-file parameter. This file is used by a swarming bot to communicate
26state of the host to tasks. It is written to by the swarming bot's
27on_before_task() hook in the swarming server's custom bot_config.py.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000028"""
29
maruel4409e302016-07-19 14:25:51 -070030__version__ = '0.8.4'
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000031
maruel064c0a32016-04-05 11:47:15 -070032import base64
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000033import logging
34import optparse
35import os
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000036import sys
37import tempfile
maruel064c0a32016-04-05 11:47:15 -070038import time
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000039
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000040from third_party.depot_tools import fix_encoding
41
Vadim Shtayura6b555c12014-07-23 16:22:18 -070042from utils import file_path
maruel12e30012015-10-09 11:55:35 -070043from utils import fs
maruel064c0a32016-04-05 11:47:15 -070044from utils import large
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040045from utils import logging_utils
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040046from utils import on_error
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -050047from utils import subprocess42
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000048from utils import tools
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +000049from utils import zip_package
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000050
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080051import auth
nodirbe642ff2016-06-09 15:51:51 -070052import cipd
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000053import isolateserver
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000054
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000055
vadimsh@chromium.org85071062013-08-21 23:37:45 +000056# Absolute path to this file (can be None if running from zip on Mac).
tansella4949442016-06-23 22:34:32 -070057THIS_FILE_PATH = os.path.abspath(
58 __file__.decode(sys.getfilesystemencoding())) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000059
60# Directory that contains this file (might be inside zip package).
tansella4949442016-06-23 22:34:32 -070061BASE_DIR = os.path.dirname(THIS_FILE_PATH) if __file__.decode(
62 sys.getfilesystemencoding()) else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000063
64# Directory that contains currently running script file.
maruel@chromium.org814d23f2013-10-01 19:08:00 +000065if zip_package.get_main_script_path():
66 MAIN_DIR = os.path.dirname(
67 os.path.abspath(zip_package.get_main_script_path()))
68else:
69 # This happens when 'import run_isolated' is executed at the python
70 # interactive prompt, in that case __file__ is undefined.
71 MAIN_DIR = None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000072
maruele2f2cb82016-07-13 14:41:03 -070073
74# Magic variables that can be found in the isolate task command line.
75ISOLATED_OUTDIR_PARAMETER = '${ISOLATED_OUTDIR}'
76EXECUTABLE_SUFFIX_PARAMETER = '${EXECUTABLE_SUFFIX}'
77SWARMING_BOT_FILE_PARAMETER = '${SWARMING_BOT_FILE}'
78
79
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000080# The name of the log file to use.
81RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
82
maruele2f2cb82016-07-13 14:41:03 -070083
csharp@chromium.orge217f302012-11-22 16:51:53 +000084# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000085RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000086
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000087
maruele2f2cb82016-07-13 14:41:03 -070088# Use short names for temporary directories. This is driven by Windows, which
89# imposes a relatively short maximum path length of 260 characters, often
90# referred to as MAX_PATH. It is relatively easy to create files with longer
91# path length. A use case is with recursive depedency treesV like npm packages.
92#
93# It is recommended to start the script with a `root_dir` as short as
94# possible.
95# - ir stands for isolated_run
96# - io stands for isolated_out
97# - it stands for isolated_tmp
98ISOLATED_RUN_DIR = u'ir'
99ISOLATED_OUT_DIR = u'io'
100ISOLATED_TMP_DIR = u'it'
101
102
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000103def get_as_zip_package(executable=True):
104 """Returns ZipPackage with this module and all its dependencies.
105
106 If |executable| is True will store run_isolated.py as __main__.py so that
107 zip package is directly executable be python.
108 """
109 # Building a zip package when running from another zip package is
110 # unsupported and probably unneeded.
111 assert not zip_package.is_zipped_module(sys.modules[__name__])
vadimsh@chromium.org85071062013-08-21 23:37:45 +0000112 assert THIS_FILE_PATH
113 assert BASE_DIR
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000114 package = zip_package.ZipPackage(root=BASE_DIR)
115 package.add_python_file(THIS_FILE_PATH, '__main__.py' if executable else None)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400116 package.add_python_file(os.path.join(BASE_DIR, 'isolated_format.py'))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000117 package.add_python_file(os.path.join(BASE_DIR, 'isolateserver.py'))
Vadim Shtayurae34e13a2014-02-02 11:23:26 -0800118 package.add_python_file(os.path.join(BASE_DIR, 'auth.py'))
nodirbe642ff2016-06-09 15:51:51 -0700119 package.add_python_file(os.path.join(BASE_DIR, 'cipd.py'))
tanselle4288c32016-07-28 09:45:40 -0700120 package.add_directory(os.path.join(BASE_DIR, 'libs'))
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000121 package.add_directory(os.path.join(BASE_DIR, 'third_party'))
122 package.add_directory(os.path.join(BASE_DIR, 'utils'))
123 return package
124
125
maruel03e11842016-07-14 10:50:16 -0700126def make_temp_dir(prefix, root_dir):
127 """Returns a new unique temporary directory."""
128 return unicode(tempfile.mkdtemp(prefix=prefix, dir=root_dir))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000129
130
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500131def change_tree_read_only(rootdir, read_only):
132 """Changes the tree read-only bits according to the read_only specification.
133
134 The flag can be 0, 1 or 2, which will affect the possibility to modify files
135 and create or delete files.
136 """
137 if read_only == 2:
138 # Files and directories (except on Windows) are marked read only. This
139 # inhibits modifying, creating or deleting files in the test directory,
140 # except on Windows where creating and deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400141 file_path.make_tree_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500142 elif read_only == 1:
143 # Files are marked read only but not the directories. This inhibits
144 # modifying files but creating or deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400145 file_path.make_tree_files_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500146 elif read_only in (0, None):
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500147 # Anything can be modified.
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500148 # TODO(maruel): This is currently dangerous as long as DiskCache.touch()
149 # is not yet changed to verify the hash of the content of the files it is
150 # looking at, so that if a test modifies an input file, the file must be
151 # deleted.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400152 file_path.make_tree_writeable(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500153 else:
154 raise ValueError(
155 'change_tree_read_only(%s, %s): Unknown flag %s' %
156 (rootdir, read_only, read_only))
157
158
nodir90bc8dc2016-06-15 13:35:21 -0700159def process_command(command, out_dir, bot_file):
nodirbe642ff2016-06-09 15:51:51 -0700160 """Replaces variables in a command line.
161
162 Raises:
163 ValueError if a parameter is requested in |command| but its value is not
164 provided.
165 """
maruela9cfd6f2015-09-15 11:03:15 -0700166 def fix(arg):
nodirbe642ff2016-06-09 15:51:51 -0700167 arg = arg.replace(EXECUTABLE_SUFFIX_PARAMETER, cipd.EXECUTABLE_SUFFIX)
168 replace_slash = False
nodir55be77b2016-05-03 09:39:57 -0700169 if ISOLATED_OUTDIR_PARAMETER in arg:
nodirbe642ff2016-06-09 15:51:51 -0700170 if not out_dir:
maruel7f63a272016-07-12 12:40:36 -0700171 raise ValueError(
172 'output directory is requested in command, but not provided; '
173 'please specify one')
nodir55be77b2016-05-03 09:39:57 -0700174 arg = arg.replace(ISOLATED_OUTDIR_PARAMETER, out_dir)
nodirbe642ff2016-06-09 15:51:51 -0700175 replace_slash = True
nodir90bc8dc2016-06-15 13:35:21 -0700176 if SWARMING_BOT_FILE_PARAMETER in arg:
177 if bot_file:
178 arg = arg.replace(SWARMING_BOT_FILE_PARAMETER, bot_file)
179 replace_slash = True
180 else:
181 logging.warning('SWARMING_BOT_FILE_PARAMETER found in command, but no '
182 'bot_file specified. Leaving parameter unchanged.')
nodirbe642ff2016-06-09 15:51:51 -0700183 if replace_slash:
184 # Replace slashes only if parameters are present
nodir55be77b2016-05-03 09:39:57 -0700185 # because of arguments like '${ISOLATED_OUTDIR}/foo/bar'
186 arg = arg.replace('/', os.sep)
maruela9cfd6f2015-09-15 11:03:15 -0700187 return arg
188
189 return [fix(arg) for arg in command]
190
191
maruel6be7f9e2015-10-01 12:25:30 -0700192def run_command(command, cwd, tmp_dir, hard_timeout, grace_period):
193 """Runs the command.
194
195 Returns:
196 tuple(process exit code, bool if had a hard timeout)
197 """
maruela9cfd6f2015-09-15 11:03:15 -0700198 logging.info('run_command(%s, %s)' % (command, cwd))
marueleb5fbee2015-09-17 13:01:36 -0700199
200 env = os.environ.copy()
201 if sys.platform == 'darwin':
tansella4949442016-06-23 22:34:32 -0700202 env['TMPDIR'] = tmp_dir.encode(sys.getfilesystemencoding())
marueleb5fbee2015-09-17 13:01:36 -0700203 elif sys.platform == 'win32':
tansella4949442016-06-23 22:34:32 -0700204 env['TEMP'] = tmp_dir.encode(sys.getfilesystemencoding())
marueleb5fbee2015-09-17 13:01:36 -0700205 else:
tansella4949442016-06-23 22:34:32 -0700206 env['TMP'] = tmp_dir.encode(sys.getfilesystemencoding())
maruel6be7f9e2015-10-01 12:25:30 -0700207 exit_code = None
208 had_hard_timeout = False
maruela9cfd6f2015-09-15 11:03:15 -0700209 with tools.Profiler('RunTest'):
maruel6be7f9e2015-10-01 12:25:30 -0700210 proc = None
211 had_signal = []
maruela9cfd6f2015-09-15 11:03:15 -0700212 try:
maruel6be7f9e2015-10-01 12:25:30 -0700213 # TODO(maruel): This code is imperfect. It doesn't handle well signals
214 # during the download phase and there's short windows were things can go
215 # wrong.
216 def handler(signum, _frame):
217 if proc and not had_signal:
218 logging.info('Received signal %d', signum)
219 had_signal.append(True)
maruel556d9052015-10-05 11:12:44 -0700220 raise subprocess42.TimeoutExpired(command, None)
maruel6be7f9e2015-10-01 12:25:30 -0700221
222 proc = subprocess42.Popen(command, cwd=cwd, env=env, detached=True)
223 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
224 try:
225 exit_code = proc.wait(hard_timeout or None)
226 except subprocess42.TimeoutExpired:
227 if not had_signal:
228 logging.warning('Hard timeout')
229 had_hard_timeout = True
230 logging.warning('Sending SIGTERM')
231 proc.terminate()
232
233 # Ignore signals in grace period. Forcibly give the grace period to the
234 # child process.
235 if exit_code is None:
236 ignore = lambda *_: None
237 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
238 try:
239 exit_code = proc.wait(grace_period or None)
240 except subprocess42.TimeoutExpired:
241 # Now kill for real. The user can distinguish between the
242 # following states:
243 # - signal but process exited within grace period,
244 # hard_timed_out will be set but the process exit code will be
245 # script provided.
246 # - processed exited late, exit code will be -9 on posix.
247 logging.warning('Grace exhausted; sending SIGKILL')
248 proc.kill()
249 logging.info('Waiting for proces exit')
250 exit_code = proc.wait()
maruela9cfd6f2015-09-15 11:03:15 -0700251 except OSError:
252 # This is not considered to be an internal error. The executable simply
253 # does not exit.
maruela72f46e2016-02-24 11:05:45 -0800254 sys.stderr.write(
255 '<The executable does not exist or a dependent library is missing>\n'
256 '<Check for missing .so/.dll in the .isolate or GN file>\n'
257 '<Command: %s>\n' % command)
258 if os.environ.get('SWARMING_TASK_ID'):
259 # Give an additional hint when running as a swarming task.
260 sys.stderr.write(
261 '<See the task\'s page for commands to help diagnose this issue '
262 'by reproducing the task locally>\n')
maruela9cfd6f2015-09-15 11:03:15 -0700263 exit_code = 1
264 logging.info(
265 'Command finished with exit code %d (%s)',
266 exit_code, hex(0xffffffff & exit_code))
maruel6be7f9e2015-10-01 12:25:30 -0700267 return exit_code, had_hard_timeout
maruela9cfd6f2015-09-15 11:03:15 -0700268
269
maruel4409e302016-07-19 14:25:51 -0700270def fetch_and_map(isolated_hash, storage, cache, outdir, use_symlinks):
271 """Fetches an isolated tree, create the tree and returns (bundle, stats)."""
nodir6f801882016-04-29 14:41:50 -0700272 start = time.time()
273 bundle = isolateserver.fetch_isolated(
274 isolated_hash=isolated_hash,
275 storage=storage,
276 cache=cache,
maruel4409e302016-07-19 14:25:51 -0700277 outdir=outdir,
278 use_symlinks=use_symlinks)
nodir6f801882016-04-29 14:41:50 -0700279 return bundle, {
280 'duration': time.time() - start,
281 'initial_number_items': cache.initial_number_items,
282 'initial_size': cache.initial_size,
283 'items_cold': base64.b64encode(large.pack(sorted(cache.added))),
284 'items_hot': base64.b64encode(
tansell9e04a8d2016-07-28 09:31:59 -0700285 large.pack(sorted(set(cache.used) - set(cache.added)))),
nodir6f801882016-04-29 14:41:50 -0700286 }
287
288
maruela9cfd6f2015-09-15 11:03:15 -0700289def delete_and_upload(storage, out_dir, leak_temp_dir):
290 """Deletes the temporary run directory and uploads results back.
291
292 Returns:
nodir6f801882016-04-29 14:41:50 -0700293 tuple(outputs_ref, success, stats)
maruel064c0a32016-04-05 11:47:15 -0700294 - outputs_ref: a dict referring to the results archived back to the isolated
295 server, if applicable.
296 - success: False if something occurred that means that the task must
297 forcibly be considered a failure, e.g. zombie processes were left
298 behind.
nodir6f801882016-04-29 14:41:50 -0700299 - stats: uploading stats.
maruela9cfd6f2015-09-15 11:03:15 -0700300 """
301
302 # Upload out_dir and generate a .isolated file out of this directory. It is
303 # only done if files were written in the directory.
304 outputs_ref = None
maruel064c0a32016-04-05 11:47:15 -0700305 cold = []
306 hot = []
nodir6f801882016-04-29 14:41:50 -0700307 start = time.time()
308
maruel12e30012015-10-09 11:55:35 -0700309 if fs.isdir(out_dir) and fs.listdir(out_dir):
maruela9cfd6f2015-09-15 11:03:15 -0700310 with tools.Profiler('ArchiveOutput'):
311 try:
maruel064c0a32016-04-05 11:47:15 -0700312 results, f_cold, f_hot = isolateserver.archive_files_to_storage(
maruela9cfd6f2015-09-15 11:03:15 -0700313 storage, [out_dir], None)
314 outputs_ref = {
315 'isolated': results[0][0],
316 'isolatedserver': storage.location,
317 'namespace': storage.namespace,
318 }
maruel064c0a32016-04-05 11:47:15 -0700319 cold = sorted(i.size for i in f_cold)
320 hot = sorted(i.size for i in f_hot)
maruela9cfd6f2015-09-15 11:03:15 -0700321 except isolateserver.Aborted:
322 # This happens when a signal SIGTERM was received while uploading data.
323 # There is 2 causes:
324 # - The task was too slow and was about to be killed anyway due to
325 # exceeding the hard timeout.
326 # - The amount of data uploaded back is very large and took too much
327 # time to archive.
328 sys.stderr.write('Received SIGTERM while uploading')
329 # Re-raise, so it will be treated as an internal failure.
330 raise
nodir6f801882016-04-29 14:41:50 -0700331
332 success = False
maruela9cfd6f2015-09-15 11:03:15 -0700333 try:
maruel12e30012015-10-09 11:55:35 -0700334 if (not leak_temp_dir and fs.isdir(out_dir) and
maruel6eeea7d2015-09-16 12:17:42 -0700335 not file_path.rmtree(out_dir)):
maruela9cfd6f2015-09-15 11:03:15 -0700336 logging.error('Had difficulties removing out_dir %s', out_dir)
nodir6f801882016-04-29 14:41:50 -0700337 else:
338 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700339 except OSError as e:
340 # When this happens, it means there's a process error.
maruel12e30012015-10-09 11:55:35 -0700341 logging.exception('Had difficulties removing out_dir %s: %s', out_dir, e)
nodir6f801882016-04-29 14:41:50 -0700342 stats = {
343 'duration': time.time() - start,
344 'items_cold': base64.b64encode(large.pack(cold)),
345 'items_hot': base64.b64encode(large.pack(hot)),
346 }
347 return outputs_ref, success, stats
maruela9cfd6f2015-09-15 11:03:15 -0700348
349
marueleb5fbee2015-09-17 13:01:36 -0700350def map_and_run(
nodir55be77b2016-05-03 09:39:57 -0700351 command, isolated_hash, storage, cache, leak_temp_dir, root_dir,
maruel4409e302016-07-19 14:25:51 -0700352 hard_timeout, grace_period, bot_file, extra_args, install_packages_fn,
353 use_symlinks):
nodir55be77b2016-05-03 09:39:57 -0700354 """Runs a command with optional isolated input/output.
355
356 See run_tha_test for argument documentation.
357
358 Returns metadata about the result.
359 """
360 assert bool(command) ^ bool(isolated_hash)
maruela9cfd6f2015-09-15 11:03:15 -0700361 result = {
maruel064c0a32016-04-05 11:47:15 -0700362 'duration': None,
maruela9cfd6f2015-09-15 11:03:15 -0700363 'exit_code': None,
maruel6be7f9e2015-10-01 12:25:30 -0700364 'had_hard_timeout': False,
maruela9cfd6f2015-09-15 11:03:15 -0700365 'internal_failure': None,
maruel064c0a32016-04-05 11:47:15 -0700366 'stats': {
nodir55715712016-06-03 12:28:19 -0700367 # 'isolated': {
nodirbe642ff2016-06-09 15:51:51 -0700368 # 'cipd': {
369 # 'duration': 0.,
370 # 'get_client_duration': 0.,
371 # },
nodir55715712016-06-03 12:28:19 -0700372 # 'download': {
373 # 'duration': 0.,
374 # 'initial_number_items': 0,
375 # 'initial_size': 0,
376 # 'items_cold': '<large.pack()>',
377 # 'items_hot': '<large.pack()>',
378 # },
379 # 'upload': {
380 # 'duration': 0.,
381 # 'items_cold': '<large.pack()>',
382 # 'items_hot': '<large.pack()>',
383 # },
maruel064c0a32016-04-05 11:47:15 -0700384 # },
385 },
maruela9cfd6f2015-09-15 11:03:15 -0700386 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700387 'version': 5,
maruela9cfd6f2015-09-15 11:03:15 -0700388 }
nodirbe642ff2016-06-09 15:51:51 -0700389
marueleb5fbee2015-09-17 13:01:36 -0700390 if root_dir:
nodire5028a92016-04-29 14:38:21 -0700391 file_path.ensure_tree(root_dir, 0700)
marueleb5fbee2015-09-17 13:01:36 -0700392 else:
maruel2e8d0f52016-07-16 07:51:29 -0700393 root_dir = os.path.dirname(cache.cache_dir) if cache.cache_dir else None
maruele2f2cb82016-07-13 14:41:03 -0700394 # See comment for these constants.
395 run_dir = make_temp_dir(ISOLATED_RUN_DIR, root_dir)
maruel03e11842016-07-14 10:50:16 -0700396 # storage should be normally set but don't crash if it is not. This can happen
397 # as Swarming task can run without an isolate server.
maruele2f2cb82016-07-13 14:41:03 -0700398 out_dir = make_temp_dir(ISOLATED_OUT_DIR, root_dir) if storage else None
399 tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, root_dir)
nodir55be77b2016-05-03 09:39:57 -0700400 cwd = run_dir
maruela9cfd6f2015-09-15 11:03:15 -0700401
nodir55be77b2016-05-03 09:39:57 -0700402 try:
nodir90bc8dc2016-06-15 13:35:21 -0700403 cipd_stats = install_packages_fn(run_dir)
404 if cipd_stats:
405 result['stats']['cipd'] = cipd_stats
406
nodir55be77b2016-05-03 09:39:57 -0700407 if isolated_hash:
nodir55715712016-06-03 12:28:19 -0700408 isolated_stats = result['stats'].setdefault('isolated', {})
maruel4409e302016-07-19 14:25:51 -0700409 bundle, isolated_stats['download'] = fetch_and_map(
nodir55be77b2016-05-03 09:39:57 -0700410 isolated_hash=isolated_hash,
411 storage=storage,
412 cache=cache,
maruel4409e302016-07-19 14:25:51 -0700413 outdir=run_dir,
414 use_symlinks=use_symlinks)
nodir55be77b2016-05-03 09:39:57 -0700415 if not bundle.command:
416 # Handle this as a task failure, not an internal failure.
417 sys.stderr.write(
418 '<The .isolated doesn\'t declare any command to run!>\n'
419 '<Check your .isolate for missing \'command\' variable>\n')
420 if os.environ.get('SWARMING_TASK_ID'):
421 # Give an additional hint when running as a swarming task.
422 sys.stderr.write('<This occurs at the \'isolate\' step>\n')
423 result['exit_code'] = 1
424 return result
425
426 change_tree_read_only(run_dir, bundle.read_only)
427 cwd = os.path.normpath(os.path.join(cwd, bundle.relative_cwd))
428 command = bundle.command + extra_args
nodirbe642ff2016-06-09 15:51:51 -0700429
nodir34d673c2016-05-24 09:30:48 -0700430 command = tools.fix_python_path(command)
nodir90bc8dc2016-06-15 13:35:21 -0700431 command = process_command(command, out_dir, bot_file)
maruela9cfd6f2015-09-15 11:03:15 -0700432 file_path.ensure_command_has_abs_path(command, cwd)
nodirbe642ff2016-06-09 15:51:51 -0700433
maruel064c0a32016-04-05 11:47:15 -0700434 sys.stdout.flush()
435 start = time.time()
436 try:
437 result['exit_code'], result['had_hard_timeout'] = run_command(
nodirbe642ff2016-06-09 15:51:51 -0700438 command, cwd, tmp_dir, hard_timeout, grace_period)
maruel064c0a32016-04-05 11:47:15 -0700439 finally:
440 result['duration'] = max(time.time() - start, 0)
maruela9cfd6f2015-09-15 11:03:15 -0700441 except Exception as e:
nodir90bc8dc2016-06-15 13:35:21 -0700442 # An internal error occurred. Report accordingly so the swarming task will
443 # be retried automatically.
maruel12e30012015-10-09 11:55:35 -0700444 logging.exception('internal failure: %s', e)
maruela9cfd6f2015-09-15 11:03:15 -0700445 result['internal_failure'] = str(e)
446 on_error.report(None)
447 finally:
448 try:
449 if leak_temp_dir:
450 logging.warning(
451 'Deliberately leaking %s for later examination', run_dir)
marueleb5fbee2015-09-17 13:01:36 -0700452 else:
maruel84537cb2015-10-16 14:21:28 -0700453 # On Windows rmtree(run_dir) call above has a synchronization effect: it
454 # finishes only when all task child processes terminate (since a running
455 # process locks *.exe file). Examine out_dir only after that call
456 # completes (since child processes may write to out_dir too and we need
457 # to wait for them to finish).
458 if fs.isdir(run_dir):
459 try:
460 success = file_path.rmtree(run_dir)
461 except OSError as e:
462 logging.error('Failure with %s', e)
463 success = False
464 if not success:
465 print >> sys.stderr, (
466 'Failed to delete the run directory, forcibly failing\n'
467 'the task because of it. No zombie process can outlive a\n'
468 'successful task run and still be marked as successful.\n'
469 'Fix your stuff.')
470 if result['exit_code'] == 0:
471 result['exit_code'] = 1
472 if fs.isdir(tmp_dir):
473 try:
474 success = file_path.rmtree(tmp_dir)
475 except OSError as e:
476 logging.error('Failure with %s', e)
477 success = False
478 if not success:
479 print >> sys.stderr, (
480 'Failed to delete the temporary directory, forcibly failing\n'
481 'the task because of it. No zombie process can outlive a\n'
482 'successful task run and still be marked as successful.\n'
483 'Fix your stuff.')
484 if result['exit_code'] == 0:
485 result['exit_code'] = 1
maruela9cfd6f2015-09-15 11:03:15 -0700486
marueleb5fbee2015-09-17 13:01:36 -0700487 # This deletes out_dir if leak_temp_dir is not set.
nodir9130f072016-05-27 13:59:08 -0700488 if out_dir:
nodir55715712016-06-03 12:28:19 -0700489 isolated_stats = result['stats'].setdefault('isolated', {})
490 result['outputs_ref'], success, isolated_stats['upload'] = (
nodir9130f072016-05-27 13:59:08 -0700491 delete_and_upload(storage, out_dir, leak_temp_dir))
maruela9cfd6f2015-09-15 11:03:15 -0700492 if not success and result['exit_code'] == 0:
493 result['exit_code'] = 1
494 except Exception as e:
495 # Swallow any exception in the main finally clause.
nodir9130f072016-05-27 13:59:08 -0700496 if out_dir:
497 logging.exception('Leaking out_dir %s: %s', out_dir, e)
maruela9cfd6f2015-09-15 11:03:15 -0700498 result['internal_failure'] = str(e)
499 return result
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500500
501
Marc-Antoine Ruel0ec868b2015-08-12 14:12:46 -0400502def run_tha_test(
nodir55be77b2016-05-03 09:39:57 -0700503 command, isolated_hash, storage, cache, leak_temp_dir, result_json,
bpastene3ae09522016-06-10 17:12:59 -0700504 root_dir, hard_timeout, grace_period, bot_file, extra_args,
maruel4409e302016-07-19 14:25:51 -0700505 install_packages_fn, use_symlinks):
nodir55be77b2016-05-03 09:39:57 -0700506 """Runs an executable and records execution metadata.
507
508 Either command or isolated_hash must be specified.
509
510 If isolated_hash is specified, downloads the dependencies in the cache,
511 hardlinks them into a temporary directory and runs the command specified in
512 the .isolated.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500513
514 A temporary directory is created to hold the output files. The content inside
515 this directory will be uploaded back to |storage| packaged as a .isolated
516 file.
517
518 Arguments:
nodir55be77b2016-05-03 09:39:57 -0700519 command: the command to run, a list of strings. Mutually exclusive with
520 isolated_hash.
Marc-Antoine Ruel35b58432014-12-08 17:40:40 -0500521 isolated_hash: the SHA-1 of the .isolated file that must be retrieved to
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500522 recreate the tree of files to run the target executable.
nodir55be77b2016-05-03 09:39:57 -0700523 The command specified in the .isolated is executed.
524 Mutually exclusive with command argument.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500525 storage: an isolateserver.Storage object to retrieve remote objects. This
526 object has a reference to an isolateserver.StorageApi, which does
527 the actual I/O.
528 cache: an isolateserver.LocalCache to keep from retrieving the same objects
529 constantly by caching the objects retrieved. Can be on-disk or
530 in-memory.
Kenneth Russell61d42352014-09-15 11:41:16 -0700531 leak_temp_dir: if true, the temporary directory will be deliberately leaked
532 for later examination.
maruela9cfd6f2015-09-15 11:03:15 -0700533 result_json: file path to dump result metadata into. If set, the process
nodirbe642ff2016-06-09 15:51:51 -0700534 exit code is always 0 unless an internal error occurred.
nodir90bc8dc2016-06-15 13:35:21 -0700535 root_dir: path to the directory to use to create the temporary directory. If
marueleb5fbee2015-09-17 13:01:36 -0700536 not specified, a random temporary directory is created.
maruel6be7f9e2015-10-01 12:25:30 -0700537 hard_timeout: kills the process if it lasts more than this amount of
538 seconds.
539 grace_period: number of seconds to wait between SIGTERM and SIGKILL.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500540 extra_args: optional arguments to add to the command stated in the .isolate
nodir55be77b2016-05-03 09:39:57 -0700541 file. Ignored if isolate_hash is empty.
nodir90bc8dc2016-06-15 13:35:21 -0700542 install_packages_fn: function (dir) => cipd_stats. Installs packages.
maruel4409e302016-07-19 14:25:51 -0700543 use_symlinks: create tree with symlinks instead of hardlinks.
maruela9cfd6f2015-09-15 11:03:15 -0700544
545 Returns:
546 Process exit code that should be used.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000547 """
nodir55be77b2016-05-03 09:39:57 -0700548 assert bool(command) ^ bool(isolated_hash)
549 extra_args = extra_args or []
nodirbe642ff2016-06-09 15:51:51 -0700550
nodir55be77b2016-05-03 09:39:57 -0700551 if any(ISOLATED_OUTDIR_PARAMETER in a for a in (command or extra_args)):
552 assert storage is not None, 'storage is None although outdir is specified'
553
maruela76b9ee2015-12-15 06:18:08 -0800554 if result_json:
555 # Write a json output file right away in case we get killed.
556 result = {
557 'exit_code': None,
558 'had_hard_timeout': False,
559 'internal_failure': 'Was terminated before completion',
560 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700561 'version': 5,
maruela76b9ee2015-12-15 06:18:08 -0800562 }
563 tools.write_json(result_json, result, dense=True)
564
maruela9cfd6f2015-09-15 11:03:15 -0700565 # run_isolated exit code. Depends on if result_json is used or not.
566 result = map_and_run(
nodir55be77b2016-05-03 09:39:57 -0700567 command, isolated_hash, storage, cache, leak_temp_dir, root_dir,
maruel4409e302016-07-19 14:25:51 -0700568 hard_timeout, grace_period, bot_file, extra_args, install_packages_fn,
569 use_symlinks)
maruela9cfd6f2015-09-15 11:03:15 -0700570 logging.info('Result:\n%s', tools.format_json(result, dense=True))
bpastene3ae09522016-06-10 17:12:59 -0700571
maruela9cfd6f2015-09-15 11:03:15 -0700572 if result_json:
maruel05d5a882015-09-21 13:59:02 -0700573 # We've found tests to delete 'work' when quitting, causing an exception
574 # here. Try to recreate the directory if necessary.
nodire5028a92016-04-29 14:38:21 -0700575 file_path.ensure_tree(os.path.dirname(result_json))
maruela9cfd6f2015-09-15 11:03:15 -0700576 tools.write_json(result_json, result, dense=True)
577 # Only return 1 if there was an internal error.
578 return int(bool(result['internal_failure']))
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000579
maruela9cfd6f2015-09-15 11:03:15 -0700580 # Marshall into old-style inline output.
581 if result['outputs_ref']:
582 data = {
583 'hash': result['outputs_ref']['isolated'],
584 'namespace': result['outputs_ref']['namespace'],
585 'storage': result['outputs_ref']['isolatedserver'],
586 }
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -0500587 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700588 print(
589 '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
590 tools.format_json(data, dense=True))
maruelb76604c2015-11-11 11:53:44 -0800591 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700592 return result['exit_code'] or int(bool(result['internal_failure']))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000593
594
nodir90bc8dc2016-06-15 13:35:21 -0700595def install_packages(
nodirff531b42016-06-23 13:05:06 -0700596 run_dir, packages, service_url, client_package_name,
nodir90bc8dc2016-06-15 13:35:21 -0700597 client_version, cache_dir=None, timeout=None):
598 """Installs packages. Returns stats.
nodirbe642ff2016-06-09 15:51:51 -0700599
600 Args:
nodir90bc8dc2016-06-15 13:35:21 -0700601 run_dir (str): root of installation.
nodirff531b42016-06-23 13:05:06 -0700602 packages: packages to install, dict {path: [(package_name, version)].
nodirbe642ff2016-06-09 15:51:51 -0700603 service_url (str): CIPD server url, e.g.
604 "https://chrome-infra-packages.appspot.com."
nodir90bc8dc2016-06-15 13:35:21 -0700605 client_package_name (str): CIPD package name of CIPD client.
606 client_version (str): Version of CIPD client.
nodirbe642ff2016-06-09 15:51:51 -0700607 cache_dir (str): where to keep cache of cipd clients, packages and tags.
608 timeout: max duration in seconds that this function can take.
nodirbe642ff2016-06-09 15:51:51 -0700609 """
610 assert cache_dir
nodirff531b42016-06-23 13:05:06 -0700611 if not packages:
nodir90bc8dc2016-06-15 13:35:21 -0700612 return None
613
nodirbe642ff2016-06-09 15:51:51 -0700614 timeoutfn = tools.sliding_timeout(timeout)
nodirbe642ff2016-06-09 15:51:51 -0700615 start = time.time()
nodirbe642ff2016-06-09 15:51:51 -0700616 cache_dir = os.path.abspath(cache_dir)
617
nodir90bc8dc2016-06-15 13:35:21 -0700618 run_dir = os.path.abspath(run_dir)
nodir90bc8dc2016-06-15 13:35:21 -0700619
nodirbe642ff2016-06-09 15:51:51 -0700620 get_client_start = time.time()
621 client_manager = cipd.get_client(
622 service_url, client_package_name, client_version, cache_dir,
623 timeout=timeoutfn())
624 with client_manager as client:
625 get_client_duration = time.time() - get_client_start
nodirff531b42016-06-23 13:05:06 -0700626 for path, packages in sorted(packages.iteritems()):
nodir90bc8dc2016-06-15 13:35:21 -0700627 site_root = os.path.abspath(os.path.join(run_dir, path))
628 if not site_root.startswith(run_dir):
629 raise cipd.Error('Invalid CIPD package path "%s"' % path)
630
631 # Do not clean site_root before installation because it may contain other
632 # site roots.
633 file_path.ensure_tree(site_root, 0770)
nodirbe642ff2016-06-09 15:51:51 -0700634 client.ensure(
635 site_root, packages,
636 cache_dir=os.path.join(cache_dir, 'cipd_internal'),
637 timeout=timeoutfn())
nodirbe642ff2016-06-09 15:51:51 -0700638 file_path.make_tree_files_read_only(site_root)
nodir90bc8dc2016-06-15 13:35:21 -0700639
640 total_duration = time.time() - start
641 logging.info(
642 'Installing CIPD client and packages took %d seconds', total_duration)
643
644 return {
645 'duration': total_duration,
646 'get_client_duration': get_client_duration,
647 }
nodirbe642ff2016-06-09 15:51:51 -0700648
649
650def create_option_parser():
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -0400651 parser = logging_utils.OptionParserWithLogging(
nodir55be77b2016-05-03 09:39:57 -0700652 usage='%prog <options> [command to run or extra args]',
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000653 version=__version__,
654 log_file=RUN_ISOLATED_LOG_FILE)
maruela9cfd6f2015-09-15 11:03:15 -0700655 parser.add_option(
maruel36a963d2016-04-08 17:15:49 -0700656 '--clean', action='store_true',
657 help='Cleans the cache, trimming it necessary and remove corrupted items '
658 'and returns without executing anything; use with -v to know what '
659 'was done')
660 parser.add_option(
maruel2e8d0f52016-07-16 07:51:29 -0700661 '--no-clean', action='store_true',
662 help='Do not clean the cache automatically on startup. This is meant for '
663 'bots where a separate execution with --clean was done earlier so '
664 'doing it again is redundant')
665 parser.add_option(
maruel4409e302016-07-19 14:25:51 -0700666 '--use-symlinks', action='store_true',
667 help='Use symlinks instead of hardlinks')
668 parser.add_option(
maruela9cfd6f2015-09-15 11:03:15 -0700669 '--json',
670 help='dump output metadata to json file. When used, run_isolated returns '
671 'non-zero only on internal failure')
maruel6be7f9e2015-10-01 12:25:30 -0700672 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -0800673 '--hard-timeout', type='float', help='Enforce hard timeout in execution')
maruel6be7f9e2015-10-01 12:25:30 -0700674 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -0800675 '--grace-period', type='float',
maruel6be7f9e2015-10-01 12:25:30 -0700676 help='Grace period between SIGTERM and SIGKILL')
bpastene3ae09522016-06-10 17:12:59 -0700677 parser.add_option(
678 '--bot-file',
679 help='Path to a file describing the state of the host. The content is '
680 'defined by on_before_task() in bot_config.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500681 data_group = optparse.OptionGroup(parser, 'Data source')
682 data_group.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500683 '-s', '--isolated',
nodir55be77b2016-05-03 09:39:57 -0700684 help='Hash of the .isolated to grab from the isolate server.')
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -0500685 isolateserver.add_isolate_server_options(data_group)
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500686 parser.add_option_group(data_group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000687
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -0400688 isolateserver.add_cache_options(parser)
nodirbe642ff2016-06-09 15:51:51 -0700689
690 cipd.add_cipd_options(parser)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000691
Kenneth Russell61d42352014-09-15 11:41:16 -0700692 debug_group = optparse.OptionGroup(parser, 'Debugging')
693 debug_group.add_option(
694 '--leak-temp-dir',
695 action='store_true',
nodirbe642ff2016-06-09 15:51:51 -0700696 help='Deliberately leak isolate\'s temp dir for later examination. '
697 'Default: %default')
marueleb5fbee2015-09-17 13:01:36 -0700698 debug_group.add_option(
699 '--root-dir', help='Use a directory instead of a random one')
Kenneth Russell61d42352014-09-15 11:41:16 -0700700 parser.add_option_group(debug_group)
701
Vadim Shtayurae34e13a2014-02-02 11:23:26 -0800702 auth.add_auth_options(parser)
nodirbe642ff2016-06-09 15:51:51 -0700703
704 parser.set_defaults(cache='cache', cipd_cache='cipd_cache')
705 return parser
706
707
708def main(args):
709 parser = create_option_parser()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -0500710 options, args = parser.parse_args(args)
maruel36a963d2016-04-08 17:15:49 -0700711
712 cache = isolateserver.process_cache_options(options)
713 if options.clean:
714 if options.isolated:
715 parser.error('Can\'t use --isolated with --clean.')
716 if options.isolate_server:
717 parser.error('Can\'t use --isolate-server with --clean.')
718 if options.json:
719 parser.error('Can\'t use --json with --clean.')
720 cache.cleanup()
721 return 0
maruel2e8d0f52016-07-16 07:51:29 -0700722 if not options.no_clean:
723 cache.cleanup()
maruel36a963d2016-04-08 17:15:49 -0700724
nodir55be77b2016-05-03 09:39:57 -0700725 if not options.isolated and not args:
726 parser.error('--isolated or command to run is required.')
727
Vadim Shtayura5d1efce2014-02-04 10:55:43 -0800728 auth.process_auth_options(parser, options)
nodir55be77b2016-05-03 09:39:57 -0700729
730 isolateserver.process_isolate_server_options(
731 parser, options, True, False)
732 if not options.isolate_server:
733 if options.isolated:
734 parser.error('--isolated requires --isolate-server')
735 if ISOLATED_OUTDIR_PARAMETER in args:
736 parser.error(
737 '%s in args requires --isolate-server' % ISOLATED_OUTDIR_PARAMETER)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000738
nodir90bc8dc2016-06-15 13:35:21 -0700739 if options.root_dir:
740 options.root_dir = unicode(os.path.abspath(options.root_dir))
maruel12e30012015-10-09 11:55:35 -0700741 if options.json:
742 options.json = unicode(os.path.abspath(options.json))
nodir55be77b2016-05-03 09:39:57 -0700743
nodirbe642ff2016-06-09 15:51:51 -0700744 cipd.validate_cipd_options(parser, options)
745
nodir90bc8dc2016-06-15 13:35:21 -0700746 install_packages_fn = lambda run_dir: install_packages(
nodirff531b42016-06-23 13:05:06 -0700747 run_dir, cipd.parse_package_args(options.cipd_packages),
748 options.cipd_server, options.cipd_client_package,
749 options.cipd_client_version, cache_dir=options.cipd_cache)
nodirbe642ff2016-06-09 15:51:51 -0700750
751 try:
nodir90bc8dc2016-06-15 13:35:21 -0700752 command = [] if options.isolated else args
753 if options.isolate_server:
754 storage = isolateserver.get_storage(
755 options.isolate_server, options.namespace)
756 with storage:
757 # Hashing schemes used by |storage| and |cache| MUST match.
758 assert storage.hash_algo == cache.hash_algo
nodirbe642ff2016-06-09 15:51:51 -0700759 return run_tha_test(
nodir90bc8dc2016-06-15 13:35:21 -0700760 command, options.isolated, storage, cache, options.leak_temp_dir,
761 options.json, options.root_dir, options.hard_timeout,
maruel4409e302016-07-19 14:25:51 -0700762 options.grace_period, options.bot_file, args, install_packages_fn,
763 options.use_symlinks)
764 return run_tha_test(
765 command, options.isolated, None, cache, options.leak_temp_dir,
766 options.json, options.root_dir, options.hard_timeout,
767 options.grace_period, options.bot_file, args, install_packages_fn,
768 options.use_symlinks)
nodirbe642ff2016-06-09 15:51:51 -0700769 except cipd.Error as ex:
770 print >> sys.stderr, ex.message
771 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000772
773
774if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -0700775 subprocess42.inhibit_os_error_reporting()
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000776 # Ensure that we are always running with the correct encoding.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +0000777 fix_encoding.fix_encoding()
maruel4409e302016-07-19 14:25:51 -0700778 file_path.enable_symlink()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -0500779 sys.exit(main(sys.argv[1:]))