blob: 9bdcc2b597b53959d267b6724725f83daa258901 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
maruelea586f32016-04-05 11:11:33 -07002# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07003# Use of this source code is governed under the Apache License, Version 2.0
4# that can be found in the LICENSE file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00005
nodir55be77b2016-05-03 09:39:57 -07006"""Runs a command with optional isolated input/output.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
nodir55be77b2016-05-03 09:39:57 -07008Despite name "run_isolated", can run a generic non-isolated command specified as
9args.
10
11If input isolated hash is provided, fetches it, creates a tree of hard links,
12appends args to the command in the fetched isolated and runs it.
13To improve performance, keeps a local cache.
14The local cache can safely be deleted.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050015
nodirbe642ff2016-06-09 15:51:51 -070016Any ${EXECUTABLE_SUFFIX} on the command line will be replaced with ".exe" string
17on Windows and "" on other platforms.
18
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050019Any ${ISOLATED_OUTDIR} on the command line will be replaced by the location of a
20temporary directory upon execution of the command specified in the .isolated
21file. All content written to this directory will be uploaded upon termination
22and the .isolated file describing this directory will be printed to stdout.
bpastene447c1992016-06-20 15:21:47 -070023
24Any ${SWARMING_BOT_FILE} on the command line will be replaced by the value of
25the --bot-file parameter. This file is used by a swarming bot to communicate
26state of the host to tasks. It is written to by the swarming bot's
27on_before_task() hook in the swarming server's custom bot_config.py.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000028"""
29
maruel4409e302016-07-19 14:25:51 -070030__version__ = '0.8.4'
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000031
maruel064c0a32016-04-05 11:47:15 -070032import base64
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000033import logging
34import optparse
35import os
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000036import sys
37import tempfile
maruel064c0a32016-04-05 11:47:15 -070038import time
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000039
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000040from third_party.depot_tools import fix_encoding
41
Vadim Shtayura6b555c12014-07-23 16:22:18 -070042from utils import file_path
maruel12e30012015-10-09 11:55:35 -070043from utils import fs
maruel064c0a32016-04-05 11:47:15 -070044from utils import large
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040045from utils import logging_utils
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040046from utils import on_error
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -050047from utils import subprocess42
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000048from utils import tools
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +000049from utils import zip_package
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000050
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080051import auth
nodirbe642ff2016-06-09 15:51:51 -070052import cipd
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000053import isolateserver
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000054
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000055
vadimsh@chromium.org85071062013-08-21 23:37:45 +000056# Absolute path to this file (can be None if running from zip on Mac).
tansella4949442016-06-23 22:34:32 -070057THIS_FILE_PATH = os.path.abspath(
58 __file__.decode(sys.getfilesystemencoding())) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000059
60# Directory that contains this file (might be inside zip package).
tansella4949442016-06-23 22:34:32 -070061BASE_DIR = os.path.dirname(THIS_FILE_PATH) if __file__.decode(
62 sys.getfilesystemencoding()) else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000063
64# Directory that contains currently running script file.
maruel@chromium.org814d23f2013-10-01 19:08:00 +000065if zip_package.get_main_script_path():
66 MAIN_DIR = os.path.dirname(
67 os.path.abspath(zip_package.get_main_script_path()))
68else:
69 # This happens when 'import run_isolated' is executed at the python
70 # interactive prompt, in that case __file__ is undefined.
71 MAIN_DIR = None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000072
maruele2f2cb82016-07-13 14:41:03 -070073
74# Magic variables that can be found in the isolate task command line.
75ISOLATED_OUTDIR_PARAMETER = '${ISOLATED_OUTDIR}'
76EXECUTABLE_SUFFIX_PARAMETER = '${EXECUTABLE_SUFFIX}'
77SWARMING_BOT_FILE_PARAMETER = '${SWARMING_BOT_FILE}'
78
79
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000080# The name of the log file to use.
81RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
82
maruele2f2cb82016-07-13 14:41:03 -070083
csharp@chromium.orge217f302012-11-22 16:51:53 +000084# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000085RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000086
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000087
maruele2f2cb82016-07-13 14:41:03 -070088# Use short names for temporary directories. This is driven by Windows, which
89# imposes a relatively short maximum path length of 260 characters, often
90# referred to as MAX_PATH. It is relatively easy to create files with longer
91# path length. A use case is with recursive depedency treesV like npm packages.
92#
93# It is recommended to start the script with a `root_dir` as short as
94# possible.
95# - ir stands for isolated_run
96# - io stands for isolated_out
97# - it stands for isolated_tmp
98ISOLATED_RUN_DIR = u'ir'
99ISOLATED_OUT_DIR = u'io'
100ISOLATED_TMP_DIR = u'it'
101
102
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000103def get_as_zip_package(executable=True):
104 """Returns ZipPackage with this module and all its dependencies.
105
106 If |executable| is True will store run_isolated.py as __main__.py so that
107 zip package is directly executable be python.
108 """
109 # Building a zip package when running from another zip package is
110 # unsupported and probably unneeded.
111 assert not zip_package.is_zipped_module(sys.modules[__name__])
vadimsh@chromium.org85071062013-08-21 23:37:45 +0000112 assert THIS_FILE_PATH
113 assert BASE_DIR
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000114 package = zip_package.ZipPackage(root=BASE_DIR)
115 package.add_python_file(THIS_FILE_PATH, '__main__.py' if executable else None)
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400116 package.add_python_file(os.path.join(BASE_DIR, 'isolated_format.py'))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000117 package.add_python_file(os.path.join(BASE_DIR, 'isolateserver.py'))
Vadim Shtayurae34e13a2014-02-02 11:23:26 -0800118 package.add_python_file(os.path.join(BASE_DIR, 'auth.py'))
nodirbe642ff2016-06-09 15:51:51 -0700119 package.add_python_file(os.path.join(BASE_DIR, 'cipd.py'))
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000120 package.add_directory(os.path.join(BASE_DIR, 'third_party'))
121 package.add_directory(os.path.join(BASE_DIR, 'utils'))
122 return package
123
124
maruel03e11842016-07-14 10:50:16 -0700125def make_temp_dir(prefix, root_dir):
126 """Returns a new unique temporary directory."""
127 return unicode(tempfile.mkdtemp(prefix=prefix, dir=root_dir))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000128
129
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500130def change_tree_read_only(rootdir, read_only):
131 """Changes the tree read-only bits according to the read_only specification.
132
133 The flag can be 0, 1 or 2, which will affect the possibility to modify files
134 and create or delete files.
135 """
136 if read_only == 2:
137 # Files and directories (except on Windows) are marked read only. This
138 # inhibits modifying, creating or deleting files in the test directory,
139 # except on Windows where creating and deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400140 file_path.make_tree_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500141 elif read_only == 1:
142 # Files are marked read only but not the directories. This inhibits
143 # modifying files but creating or deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400144 file_path.make_tree_files_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500145 elif read_only in (0, None):
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500146 # Anything can be modified.
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500147 # TODO(maruel): This is currently dangerous as long as DiskCache.touch()
148 # is not yet changed to verify the hash of the content of the files it is
149 # looking at, so that if a test modifies an input file, the file must be
150 # deleted.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400151 file_path.make_tree_writeable(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500152 else:
153 raise ValueError(
154 'change_tree_read_only(%s, %s): Unknown flag %s' %
155 (rootdir, read_only, read_only))
156
157
nodir90bc8dc2016-06-15 13:35:21 -0700158def process_command(command, out_dir, bot_file):
nodirbe642ff2016-06-09 15:51:51 -0700159 """Replaces variables in a command line.
160
161 Raises:
162 ValueError if a parameter is requested in |command| but its value is not
163 provided.
164 """
maruela9cfd6f2015-09-15 11:03:15 -0700165 def fix(arg):
nodirbe642ff2016-06-09 15:51:51 -0700166 arg = arg.replace(EXECUTABLE_SUFFIX_PARAMETER, cipd.EXECUTABLE_SUFFIX)
167 replace_slash = False
nodir55be77b2016-05-03 09:39:57 -0700168 if ISOLATED_OUTDIR_PARAMETER in arg:
nodirbe642ff2016-06-09 15:51:51 -0700169 if not out_dir:
maruel7f63a272016-07-12 12:40:36 -0700170 raise ValueError(
171 'output directory is requested in command, but not provided; '
172 'please specify one')
nodir55be77b2016-05-03 09:39:57 -0700173 arg = arg.replace(ISOLATED_OUTDIR_PARAMETER, out_dir)
nodirbe642ff2016-06-09 15:51:51 -0700174 replace_slash = True
nodir90bc8dc2016-06-15 13:35:21 -0700175 if SWARMING_BOT_FILE_PARAMETER in arg:
176 if bot_file:
177 arg = arg.replace(SWARMING_BOT_FILE_PARAMETER, bot_file)
178 replace_slash = True
179 else:
180 logging.warning('SWARMING_BOT_FILE_PARAMETER found in command, but no '
181 'bot_file specified. Leaving parameter unchanged.')
nodirbe642ff2016-06-09 15:51:51 -0700182 if replace_slash:
183 # Replace slashes only if parameters are present
nodir55be77b2016-05-03 09:39:57 -0700184 # because of arguments like '${ISOLATED_OUTDIR}/foo/bar'
185 arg = arg.replace('/', os.sep)
maruela9cfd6f2015-09-15 11:03:15 -0700186 return arg
187
188 return [fix(arg) for arg in command]
189
190
maruel6be7f9e2015-10-01 12:25:30 -0700191def run_command(command, cwd, tmp_dir, hard_timeout, grace_period):
192 """Runs the command.
193
194 Returns:
195 tuple(process exit code, bool if had a hard timeout)
196 """
maruela9cfd6f2015-09-15 11:03:15 -0700197 logging.info('run_command(%s, %s)' % (command, cwd))
marueleb5fbee2015-09-17 13:01:36 -0700198
199 env = os.environ.copy()
200 if sys.platform == 'darwin':
tansella4949442016-06-23 22:34:32 -0700201 env['TMPDIR'] = tmp_dir.encode(sys.getfilesystemencoding())
marueleb5fbee2015-09-17 13:01:36 -0700202 elif sys.platform == 'win32':
tansella4949442016-06-23 22:34:32 -0700203 env['TEMP'] = tmp_dir.encode(sys.getfilesystemencoding())
marueleb5fbee2015-09-17 13:01:36 -0700204 else:
tansella4949442016-06-23 22:34:32 -0700205 env['TMP'] = tmp_dir.encode(sys.getfilesystemencoding())
maruel6be7f9e2015-10-01 12:25:30 -0700206 exit_code = None
207 had_hard_timeout = False
maruela9cfd6f2015-09-15 11:03:15 -0700208 with tools.Profiler('RunTest'):
maruel6be7f9e2015-10-01 12:25:30 -0700209 proc = None
210 had_signal = []
maruela9cfd6f2015-09-15 11:03:15 -0700211 try:
maruel6be7f9e2015-10-01 12:25:30 -0700212 # TODO(maruel): This code is imperfect. It doesn't handle well signals
213 # during the download phase and there's short windows were things can go
214 # wrong.
215 def handler(signum, _frame):
216 if proc and not had_signal:
217 logging.info('Received signal %d', signum)
218 had_signal.append(True)
maruel556d9052015-10-05 11:12:44 -0700219 raise subprocess42.TimeoutExpired(command, None)
maruel6be7f9e2015-10-01 12:25:30 -0700220
221 proc = subprocess42.Popen(command, cwd=cwd, env=env, detached=True)
222 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
223 try:
224 exit_code = proc.wait(hard_timeout or None)
225 except subprocess42.TimeoutExpired:
226 if not had_signal:
227 logging.warning('Hard timeout')
228 had_hard_timeout = True
229 logging.warning('Sending SIGTERM')
230 proc.terminate()
231
232 # Ignore signals in grace period. Forcibly give the grace period to the
233 # child process.
234 if exit_code is None:
235 ignore = lambda *_: None
236 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
237 try:
238 exit_code = proc.wait(grace_period or None)
239 except subprocess42.TimeoutExpired:
240 # Now kill for real. The user can distinguish between the
241 # following states:
242 # - signal but process exited within grace period,
243 # hard_timed_out will be set but the process exit code will be
244 # script provided.
245 # - processed exited late, exit code will be -9 on posix.
246 logging.warning('Grace exhausted; sending SIGKILL')
247 proc.kill()
248 logging.info('Waiting for proces exit')
249 exit_code = proc.wait()
maruela9cfd6f2015-09-15 11:03:15 -0700250 except OSError:
251 # This is not considered to be an internal error. The executable simply
252 # does not exit.
maruela72f46e2016-02-24 11:05:45 -0800253 sys.stderr.write(
254 '<The executable does not exist or a dependent library is missing>\n'
255 '<Check for missing .so/.dll in the .isolate or GN file>\n'
256 '<Command: %s>\n' % command)
257 if os.environ.get('SWARMING_TASK_ID'):
258 # Give an additional hint when running as a swarming task.
259 sys.stderr.write(
260 '<See the task\'s page for commands to help diagnose this issue '
261 'by reproducing the task locally>\n')
maruela9cfd6f2015-09-15 11:03:15 -0700262 exit_code = 1
263 logging.info(
264 'Command finished with exit code %d (%s)',
265 exit_code, hex(0xffffffff & exit_code))
maruel6be7f9e2015-10-01 12:25:30 -0700266 return exit_code, had_hard_timeout
maruela9cfd6f2015-09-15 11:03:15 -0700267
268
maruel4409e302016-07-19 14:25:51 -0700269def fetch_and_map(isolated_hash, storage, cache, outdir, use_symlinks):
270 """Fetches an isolated tree, create the tree and returns (bundle, stats)."""
nodir6f801882016-04-29 14:41:50 -0700271 start = time.time()
272 bundle = isolateserver.fetch_isolated(
273 isolated_hash=isolated_hash,
274 storage=storage,
275 cache=cache,
maruel4409e302016-07-19 14:25:51 -0700276 outdir=outdir,
277 use_symlinks=use_symlinks)
nodir6f801882016-04-29 14:41:50 -0700278 return bundle, {
279 'duration': time.time() - start,
280 'initial_number_items': cache.initial_number_items,
281 'initial_size': cache.initial_size,
282 'items_cold': base64.b64encode(large.pack(sorted(cache.added))),
283 'items_hot': base64.b64encode(
tansell9e04a8d2016-07-28 09:31:59 -0700284 large.pack(sorted(set(cache.used) - set(cache.added)))),
nodir6f801882016-04-29 14:41:50 -0700285 }
286
287
maruela9cfd6f2015-09-15 11:03:15 -0700288def delete_and_upload(storage, out_dir, leak_temp_dir):
289 """Deletes the temporary run directory and uploads results back.
290
291 Returns:
nodir6f801882016-04-29 14:41:50 -0700292 tuple(outputs_ref, success, stats)
maruel064c0a32016-04-05 11:47:15 -0700293 - outputs_ref: a dict referring to the results archived back to the isolated
294 server, if applicable.
295 - success: False if something occurred that means that the task must
296 forcibly be considered a failure, e.g. zombie processes were left
297 behind.
nodir6f801882016-04-29 14:41:50 -0700298 - stats: uploading stats.
maruela9cfd6f2015-09-15 11:03:15 -0700299 """
300
301 # Upload out_dir and generate a .isolated file out of this directory. It is
302 # only done if files were written in the directory.
303 outputs_ref = None
maruel064c0a32016-04-05 11:47:15 -0700304 cold = []
305 hot = []
nodir6f801882016-04-29 14:41:50 -0700306 start = time.time()
307
maruel12e30012015-10-09 11:55:35 -0700308 if fs.isdir(out_dir) and fs.listdir(out_dir):
maruela9cfd6f2015-09-15 11:03:15 -0700309 with tools.Profiler('ArchiveOutput'):
310 try:
maruel064c0a32016-04-05 11:47:15 -0700311 results, f_cold, f_hot = isolateserver.archive_files_to_storage(
maruela9cfd6f2015-09-15 11:03:15 -0700312 storage, [out_dir], None)
313 outputs_ref = {
314 'isolated': results[0][0],
315 'isolatedserver': storage.location,
316 'namespace': storage.namespace,
317 }
maruel064c0a32016-04-05 11:47:15 -0700318 cold = sorted(i.size for i in f_cold)
319 hot = sorted(i.size for i in f_hot)
maruela9cfd6f2015-09-15 11:03:15 -0700320 except isolateserver.Aborted:
321 # This happens when a signal SIGTERM was received while uploading data.
322 # There is 2 causes:
323 # - The task was too slow and was about to be killed anyway due to
324 # exceeding the hard timeout.
325 # - The amount of data uploaded back is very large and took too much
326 # time to archive.
327 sys.stderr.write('Received SIGTERM while uploading')
328 # Re-raise, so it will be treated as an internal failure.
329 raise
nodir6f801882016-04-29 14:41:50 -0700330
331 success = False
maruela9cfd6f2015-09-15 11:03:15 -0700332 try:
maruel12e30012015-10-09 11:55:35 -0700333 if (not leak_temp_dir and fs.isdir(out_dir) and
maruel6eeea7d2015-09-16 12:17:42 -0700334 not file_path.rmtree(out_dir)):
maruela9cfd6f2015-09-15 11:03:15 -0700335 logging.error('Had difficulties removing out_dir %s', out_dir)
nodir6f801882016-04-29 14:41:50 -0700336 else:
337 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700338 except OSError as e:
339 # When this happens, it means there's a process error.
maruel12e30012015-10-09 11:55:35 -0700340 logging.exception('Had difficulties removing out_dir %s: %s', out_dir, e)
nodir6f801882016-04-29 14:41:50 -0700341 stats = {
342 'duration': time.time() - start,
343 'items_cold': base64.b64encode(large.pack(cold)),
344 'items_hot': base64.b64encode(large.pack(hot)),
345 }
346 return outputs_ref, success, stats
maruela9cfd6f2015-09-15 11:03:15 -0700347
348
marueleb5fbee2015-09-17 13:01:36 -0700349def map_and_run(
nodir55be77b2016-05-03 09:39:57 -0700350 command, isolated_hash, storage, cache, leak_temp_dir, root_dir,
maruel4409e302016-07-19 14:25:51 -0700351 hard_timeout, grace_period, bot_file, extra_args, install_packages_fn,
352 use_symlinks):
nodir55be77b2016-05-03 09:39:57 -0700353 """Runs a command with optional isolated input/output.
354
355 See run_tha_test for argument documentation.
356
357 Returns metadata about the result.
358 """
359 assert bool(command) ^ bool(isolated_hash)
maruela9cfd6f2015-09-15 11:03:15 -0700360 result = {
maruel064c0a32016-04-05 11:47:15 -0700361 'duration': None,
maruela9cfd6f2015-09-15 11:03:15 -0700362 'exit_code': None,
maruel6be7f9e2015-10-01 12:25:30 -0700363 'had_hard_timeout': False,
maruela9cfd6f2015-09-15 11:03:15 -0700364 'internal_failure': None,
maruel064c0a32016-04-05 11:47:15 -0700365 'stats': {
nodir55715712016-06-03 12:28:19 -0700366 # 'isolated': {
nodirbe642ff2016-06-09 15:51:51 -0700367 # 'cipd': {
368 # 'duration': 0.,
369 # 'get_client_duration': 0.,
370 # },
nodir55715712016-06-03 12:28:19 -0700371 # 'download': {
372 # 'duration': 0.,
373 # 'initial_number_items': 0,
374 # 'initial_size': 0,
375 # 'items_cold': '<large.pack()>',
376 # 'items_hot': '<large.pack()>',
377 # },
378 # 'upload': {
379 # 'duration': 0.,
380 # 'items_cold': '<large.pack()>',
381 # 'items_hot': '<large.pack()>',
382 # },
maruel064c0a32016-04-05 11:47:15 -0700383 # },
384 },
maruela9cfd6f2015-09-15 11:03:15 -0700385 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700386 'version': 5,
maruela9cfd6f2015-09-15 11:03:15 -0700387 }
nodirbe642ff2016-06-09 15:51:51 -0700388
marueleb5fbee2015-09-17 13:01:36 -0700389 if root_dir:
nodire5028a92016-04-29 14:38:21 -0700390 file_path.ensure_tree(root_dir, 0700)
marueleb5fbee2015-09-17 13:01:36 -0700391 else:
maruel2e8d0f52016-07-16 07:51:29 -0700392 root_dir = os.path.dirname(cache.cache_dir) if cache.cache_dir else None
maruele2f2cb82016-07-13 14:41:03 -0700393 # See comment for these constants.
394 run_dir = make_temp_dir(ISOLATED_RUN_DIR, root_dir)
maruel03e11842016-07-14 10:50:16 -0700395 # storage should be normally set but don't crash if it is not. This can happen
396 # as Swarming task can run without an isolate server.
maruele2f2cb82016-07-13 14:41:03 -0700397 out_dir = make_temp_dir(ISOLATED_OUT_DIR, root_dir) if storage else None
398 tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, root_dir)
nodir55be77b2016-05-03 09:39:57 -0700399 cwd = run_dir
maruela9cfd6f2015-09-15 11:03:15 -0700400
nodir55be77b2016-05-03 09:39:57 -0700401 try:
nodir90bc8dc2016-06-15 13:35:21 -0700402 cipd_stats = install_packages_fn(run_dir)
403 if cipd_stats:
404 result['stats']['cipd'] = cipd_stats
405
nodir55be77b2016-05-03 09:39:57 -0700406 if isolated_hash:
nodir55715712016-06-03 12:28:19 -0700407 isolated_stats = result['stats'].setdefault('isolated', {})
maruel4409e302016-07-19 14:25:51 -0700408 bundle, isolated_stats['download'] = fetch_and_map(
nodir55be77b2016-05-03 09:39:57 -0700409 isolated_hash=isolated_hash,
410 storage=storage,
411 cache=cache,
maruel4409e302016-07-19 14:25:51 -0700412 outdir=run_dir,
413 use_symlinks=use_symlinks)
nodir55be77b2016-05-03 09:39:57 -0700414 if not bundle.command:
415 # Handle this as a task failure, not an internal failure.
416 sys.stderr.write(
417 '<The .isolated doesn\'t declare any command to run!>\n'
418 '<Check your .isolate for missing \'command\' variable>\n')
419 if os.environ.get('SWARMING_TASK_ID'):
420 # Give an additional hint when running as a swarming task.
421 sys.stderr.write('<This occurs at the \'isolate\' step>\n')
422 result['exit_code'] = 1
423 return result
424
425 change_tree_read_only(run_dir, bundle.read_only)
426 cwd = os.path.normpath(os.path.join(cwd, bundle.relative_cwd))
427 command = bundle.command + extra_args
nodirbe642ff2016-06-09 15:51:51 -0700428
nodir34d673c2016-05-24 09:30:48 -0700429 command = tools.fix_python_path(command)
nodir90bc8dc2016-06-15 13:35:21 -0700430 command = process_command(command, out_dir, bot_file)
maruela9cfd6f2015-09-15 11:03:15 -0700431 file_path.ensure_command_has_abs_path(command, cwd)
nodirbe642ff2016-06-09 15:51:51 -0700432
maruel064c0a32016-04-05 11:47:15 -0700433 sys.stdout.flush()
434 start = time.time()
435 try:
436 result['exit_code'], result['had_hard_timeout'] = run_command(
nodirbe642ff2016-06-09 15:51:51 -0700437 command, cwd, tmp_dir, hard_timeout, grace_period)
maruel064c0a32016-04-05 11:47:15 -0700438 finally:
439 result['duration'] = max(time.time() - start, 0)
maruela9cfd6f2015-09-15 11:03:15 -0700440 except Exception as e:
nodir90bc8dc2016-06-15 13:35:21 -0700441 # An internal error occurred. Report accordingly so the swarming task will
442 # be retried automatically.
maruel12e30012015-10-09 11:55:35 -0700443 logging.exception('internal failure: %s', e)
maruela9cfd6f2015-09-15 11:03:15 -0700444 result['internal_failure'] = str(e)
445 on_error.report(None)
446 finally:
447 try:
448 if leak_temp_dir:
449 logging.warning(
450 'Deliberately leaking %s for later examination', run_dir)
marueleb5fbee2015-09-17 13:01:36 -0700451 else:
maruel84537cb2015-10-16 14:21:28 -0700452 # On Windows rmtree(run_dir) call above has a synchronization effect: it
453 # finishes only when all task child processes terminate (since a running
454 # process locks *.exe file). Examine out_dir only after that call
455 # completes (since child processes may write to out_dir too and we need
456 # to wait for them to finish).
457 if fs.isdir(run_dir):
458 try:
459 success = file_path.rmtree(run_dir)
460 except OSError as e:
461 logging.error('Failure with %s', e)
462 success = False
463 if not success:
464 print >> sys.stderr, (
465 'Failed to delete the run directory, forcibly failing\n'
466 'the task because of it. No zombie process can outlive a\n'
467 'successful task run and still be marked as successful.\n'
468 'Fix your stuff.')
469 if result['exit_code'] == 0:
470 result['exit_code'] = 1
471 if fs.isdir(tmp_dir):
472 try:
473 success = file_path.rmtree(tmp_dir)
474 except OSError as e:
475 logging.error('Failure with %s', e)
476 success = False
477 if not success:
478 print >> sys.stderr, (
479 'Failed to delete the temporary directory, forcibly failing\n'
480 'the task because of it. No zombie process can outlive a\n'
481 'successful task run and still be marked as successful.\n'
482 'Fix your stuff.')
483 if result['exit_code'] == 0:
484 result['exit_code'] = 1
maruela9cfd6f2015-09-15 11:03:15 -0700485
marueleb5fbee2015-09-17 13:01:36 -0700486 # This deletes out_dir if leak_temp_dir is not set.
nodir9130f072016-05-27 13:59:08 -0700487 if out_dir:
nodir55715712016-06-03 12:28:19 -0700488 isolated_stats = result['stats'].setdefault('isolated', {})
489 result['outputs_ref'], success, isolated_stats['upload'] = (
nodir9130f072016-05-27 13:59:08 -0700490 delete_and_upload(storage, out_dir, leak_temp_dir))
maruela9cfd6f2015-09-15 11:03:15 -0700491 if not success and result['exit_code'] == 0:
492 result['exit_code'] = 1
493 except Exception as e:
494 # Swallow any exception in the main finally clause.
nodir9130f072016-05-27 13:59:08 -0700495 if out_dir:
496 logging.exception('Leaking out_dir %s: %s', out_dir, e)
maruela9cfd6f2015-09-15 11:03:15 -0700497 result['internal_failure'] = str(e)
498 return result
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500499
500
Marc-Antoine Ruel0ec868b2015-08-12 14:12:46 -0400501def run_tha_test(
nodir55be77b2016-05-03 09:39:57 -0700502 command, isolated_hash, storage, cache, leak_temp_dir, result_json,
bpastene3ae09522016-06-10 17:12:59 -0700503 root_dir, hard_timeout, grace_period, bot_file, extra_args,
maruel4409e302016-07-19 14:25:51 -0700504 install_packages_fn, use_symlinks):
nodir55be77b2016-05-03 09:39:57 -0700505 """Runs an executable and records execution metadata.
506
507 Either command or isolated_hash must be specified.
508
509 If isolated_hash is specified, downloads the dependencies in the cache,
510 hardlinks them into a temporary directory and runs the command specified in
511 the .isolated.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500512
513 A temporary directory is created to hold the output files. The content inside
514 this directory will be uploaded back to |storage| packaged as a .isolated
515 file.
516
517 Arguments:
nodir55be77b2016-05-03 09:39:57 -0700518 command: the command to run, a list of strings. Mutually exclusive with
519 isolated_hash.
Marc-Antoine Ruel35b58432014-12-08 17:40:40 -0500520 isolated_hash: the SHA-1 of the .isolated file that must be retrieved to
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500521 recreate the tree of files to run the target executable.
nodir55be77b2016-05-03 09:39:57 -0700522 The command specified in the .isolated is executed.
523 Mutually exclusive with command argument.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500524 storage: an isolateserver.Storage object to retrieve remote objects. This
525 object has a reference to an isolateserver.StorageApi, which does
526 the actual I/O.
527 cache: an isolateserver.LocalCache to keep from retrieving the same objects
528 constantly by caching the objects retrieved. Can be on-disk or
529 in-memory.
Kenneth Russell61d42352014-09-15 11:41:16 -0700530 leak_temp_dir: if true, the temporary directory will be deliberately leaked
531 for later examination.
maruela9cfd6f2015-09-15 11:03:15 -0700532 result_json: file path to dump result metadata into. If set, the process
nodirbe642ff2016-06-09 15:51:51 -0700533 exit code is always 0 unless an internal error occurred.
nodir90bc8dc2016-06-15 13:35:21 -0700534 root_dir: path to the directory to use to create the temporary directory. If
marueleb5fbee2015-09-17 13:01:36 -0700535 not specified, a random temporary directory is created.
maruel6be7f9e2015-10-01 12:25:30 -0700536 hard_timeout: kills the process if it lasts more than this amount of
537 seconds.
538 grace_period: number of seconds to wait between SIGTERM and SIGKILL.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500539 extra_args: optional arguments to add to the command stated in the .isolate
nodir55be77b2016-05-03 09:39:57 -0700540 file. Ignored if isolate_hash is empty.
nodir90bc8dc2016-06-15 13:35:21 -0700541 install_packages_fn: function (dir) => cipd_stats. Installs packages.
maruel4409e302016-07-19 14:25:51 -0700542 use_symlinks: create tree with symlinks instead of hardlinks.
maruela9cfd6f2015-09-15 11:03:15 -0700543
544 Returns:
545 Process exit code that should be used.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000546 """
nodir55be77b2016-05-03 09:39:57 -0700547 assert bool(command) ^ bool(isolated_hash)
548 extra_args = extra_args or []
nodirbe642ff2016-06-09 15:51:51 -0700549
nodir55be77b2016-05-03 09:39:57 -0700550 if any(ISOLATED_OUTDIR_PARAMETER in a for a in (command or extra_args)):
551 assert storage is not None, 'storage is None although outdir is specified'
552
maruela76b9ee2015-12-15 06:18:08 -0800553 if result_json:
554 # Write a json output file right away in case we get killed.
555 result = {
556 'exit_code': None,
557 'had_hard_timeout': False,
558 'internal_failure': 'Was terminated before completion',
559 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700560 'version': 5,
maruela76b9ee2015-12-15 06:18:08 -0800561 }
562 tools.write_json(result_json, result, dense=True)
563
maruela9cfd6f2015-09-15 11:03:15 -0700564 # run_isolated exit code. Depends on if result_json is used or not.
565 result = map_and_run(
nodir55be77b2016-05-03 09:39:57 -0700566 command, isolated_hash, storage, cache, leak_temp_dir, root_dir,
maruel4409e302016-07-19 14:25:51 -0700567 hard_timeout, grace_period, bot_file, extra_args, install_packages_fn,
568 use_symlinks)
maruela9cfd6f2015-09-15 11:03:15 -0700569 logging.info('Result:\n%s', tools.format_json(result, dense=True))
bpastene3ae09522016-06-10 17:12:59 -0700570
maruela9cfd6f2015-09-15 11:03:15 -0700571 if result_json:
maruel05d5a882015-09-21 13:59:02 -0700572 # We've found tests to delete 'work' when quitting, causing an exception
573 # here. Try to recreate the directory if necessary.
nodire5028a92016-04-29 14:38:21 -0700574 file_path.ensure_tree(os.path.dirname(result_json))
maruela9cfd6f2015-09-15 11:03:15 -0700575 tools.write_json(result_json, result, dense=True)
576 # Only return 1 if there was an internal error.
577 return int(bool(result['internal_failure']))
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000578
maruela9cfd6f2015-09-15 11:03:15 -0700579 # Marshall into old-style inline output.
580 if result['outputs_ref']:
581 data = {
582 'hash': result['outputs_ref']['isolated'],
583 'namespace': result['outputs_ref']['namespace'],
584 'storage': result['outputs_ref']['isolatedserver'],
585 }
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -0500586 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700587 print(
588 '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
589 tools.format_json(data, dense=True))
maruelb76604c2015-11-11 11:53:44 -0800590 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700591 return result['exit_code'] or int(bool(result['internal_failure']))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000592
593
nodir90bc8dc2016-06-15 13:35:21 -0700594def install_packages(
nodirff531b42016-06-23 13:05:06 -0700595 run_dir, packages, service_url, client_package_name,
nodir90bc8dc2016-06-15 13:35:21 -0700596 client_version, cache_dir=None, timeout=None):
597 """Installs packages. Returns stats.
nodirbe642ff2016-06-09 15:51:51 -0700598
599 Args:
nodir90bc8dc2016-06-15 13:35:21 -0700600 run_dir (str): root of installation.
nodirff531b42016-06-23 13:05:06 -0700601 packages: packages to install, dict {path: [(package_name, version)].
nodirbe642ff2016-06-09 15:51:51 -0700602 service_url (str): CIPD server url, e.g.
603 "https://chrome-infra-packages.appspot.com."
nodir90bc8dc2016-06-15 13:35:21 -0700604 client_package_name (str): CIPD package name of CIPD client.
605 client_version (str): Version of CIPD client.
nodirbe642ff2016-06-09 15:51:51 -0700606 cache_dir (str): where to keep cache of cipd clients, packages and tags.
607 timeout: max duration in seconds that this function can take.
nodirbe642ff2016-06-09 15:51:51 -0700608 """
609 assert cache_dir
nodirff531b42016-06-23 13:05:06 -0700610 if not packages:
nodir90bc8dc2016-06-15 13:35:21 -0700611 return None
612
nodirbe642ff2016-06-09 15:51:51 -0700613 timeoutfn = tools.sliding_timeout(timeout)
nodirbe642ff2016-06-09 15:51:51 -0700614 start = time.time()
nodirbe642ff2016-06-09 15:51:51 -0700615 cache_dir = os.path.abspath(cache_dir)
616
nodir90bc8dc2016-06-15 13:35:21 -0700617 run_dir = os.path.abspath(run_dir)
nodir90bc8dc2016-06-15 13:35:21 -0700618
nodirbe642ff2016-06-09 15:51:51 -0700619 get_client_start = time.time()
620 client_manager = cipd.get_client(
621 service_url, client_package_name, client_version, cache_dir,
622 timeout=timeoutfn())
623 with client_manager as client:
624 get_client_duration = time.time() - get_client_start
nodirff531b42016-06-23 13:05:06 -0700625 for path, packages in sorted(packages.iteritems()):
nodir90bc8dc2016-06-15 13:35:21 -0700626 site_root = os.path.abspath(os.path.join(run_dir, path))
627 if not site_root.startswith(run_dir):
628 raise cipd.Error('Invalid CIPD package path "%s"' % path)
629
630 # Do not clean site_root before installation because it may contain other
631 # site roots.
632 file_path.ensure_tree(site_root, 0770)
nodirbe642ff2016-06-09 15:51:51 -0700633 client.ensure(
634 site_root, packages,
635 cache_dir=os.path.join(cache_dir, 'cipd_internal'),
636 timeout=timeoutfn())
nodirbe642ff2016-06-09 15:51:51 -0700637 file_path.make_tree_files_read_only(site_root)
nodir90bc8dc2016-06-15 13:35:21 -0700638
639 total_duration = time.time() - start
640 logging.info(
641 'Installing CIPD client and packages took %d seconds', total_duration)
642
643 return {
644 'duration': total_duration,
645 'get_client_duration': get_client_duration,
646 }
nodirbe642ff2016-06-09 15:51:51 -0700647
648
649def create_option_parser():
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -0400650 parser = logging_utils.OptionParserWithLogging(
nodir55be77b2016-05-03 09:39:57 -0700651 usage='%prog <options> [command to run or extra args]',
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000652 version=__version__,
653 log_file=RUN_ISOLATED_LOG_FILE)
maruela9cfd6f2015-09-15 11:03:15 -0700654 parser.add_option(
maruel36a963d2016-04-08 17:15:49 -0700655 '--clean', action='store_true',
656 help='Cleans the cache, trimming it necessary and remove corrupted items '
657 'and returns without executing anything; use with -v to know what '
658 'was done')
659 parser.add_option(
maruel2e8d0f52016-07-16 07:51:29 -0700660 '--no-clean', action='store_true',
661 help='Do not clean the cache automatically on startup. This is meant for '
662 'bots where a separate execution with --clean was done earlier so '
663 'doing it again is redundant')
664 parser.add_option(
maruel4409e302016-07-19 14:25:51 -0700665 '--use-symlinks', action='store_true',
666 help='Use symlinks instead of hardlinks')
667 parser.add_option(
maruela9cfd6f2015-09-15 11:03:15 -0700668 '--json',
669 help='dump output metadata to json file. When used, run_isolated returns '
670 'non-zero only on internal failure')
maruel6be7f9e2015-10-01 12:25:30 -0700671 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -0800672 '--hard-timeout', type='float', help='Enforce hard timeout in execution')
maruel6be7f9e2015-10-01 12:25:30 -0700673 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -0800674 '--grace-period', type='float',
maruel6be7f9e2015-10-01 12:25:30 -0700675 help='Grace period between SIGTERM and SIGKILL')
bpastene3ae09522016-06-10 17:12:59 -0700676 parser.add_option(
677 '--bot-file',
678 help='Path to a file describing the state of the host. The content is '
679 'defined by on_before_task() in bot_config.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500680 data_group = optparse.OptionGroup(parser, 'Data source')
681 data_group.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500682 '-s', '--isolated',
nodir55be77b2016-05-03 09:39:57 -0700683 help='Hash of the .isolated to grab from the isolate server.')
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -0500684 isolateserver.add_isolate_server_options(data_group)
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500685 parser.add_option_group(data_group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000686
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -0400687 isolateserver.add_cache_options(parser)
nodirbe642ff2016-06-09 15:51:51 -0700688
689 cipd.add_cipd_options(parser)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000690
Kenneth Russell61d42352014-09-15 11:41:16 -0700691 debug_group = optparse.OptionGroup(parser, 'Debugging')
692 debug_group.add_option(
693 '--leak-temp-dir',
694 action='store_true',
nodirbe642ff2016-06-09 15:51:51 -0700695 help='Deliberately leak isolate\'s temp dir for later examination. '
696 'Default: %default')
marueleb5fbee2015-09-17 13:01:36 -0700697 debug_group.add_option(
698 '--root-dir', help='Use a directory instead of a random one')
Kenneth Russell61d42352014-09-15 11:41:16 -0700699 parser.add_option_group(debug_group)
700
Vadim Shtayurae34e13a2014-02-02 11:23:26 -0800701 auth.add_auth_options(parser)
nodirbe642ff2016-06-09 15:51:51 -0700702
703 parser.set_defaults(cache='cache', cipd_cache='cipd_cache')
704 return parser
705
706
707def main(args):
708 parser = create_option_parser()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -0500709 options, args = parser.parse_args(args)
maruel36a963d2016-04-08 17:15:49 -0700710
711 cache = isolateserver.process_cache_options(options)
712 if options.clean:
713 if options.isolated:
714 parser.error('Can\'t use --isolated with --clean.')
715 if options.isolate_server:
716 parser.error('Can\'t use --isolate-server with --clean.')
717 if options.json:
718 parser.error('Can\'t use --json with --clean.')
719 cache.cleanup()
720 return 0
maruel2e8d0f52016-07-16 07:51:29 -0700721 if not options.no_clean:
722 cache.cleanup()
maruel36a963d2016-04-08 17:15:49 -0700723
nodir55be77b2016-05-03 09:39:57 -0700724 if not options.isolated and not args:
725 parser.error('--isolated or command to run is required.')
726
Vadim Shtayura5d1efce2014-02-04 10:55:43 -0800727 auth.process_auth_options(parser, options)
nodir55be77b2016-05-03 09:39:57 -0700728
729 isolateserver.process_isolate_server_options(
730 parser, options, True, False)
731 if not options.isolate_server:
732 if options.isolated:
733 parser.error('--isolated requires --isolate-server')
734 if ISOLATED_OUTDIR_PARAMETER in args:
735 parser.error(
736 '%s in args requires --isolate-server' % ISOLATED_OUTDIR_PARAMETER)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000737
nodir90bc8dc2016-06-15 13:35:21 -0700738 if options.root_dir:
739 options.root_dir = unicode(os.path.abspath(options.root_dir))
maruel12e30012015-10-09 11:55:35 -0700740 if options.json:
741 options.json = unicode(os.path.abspath(options.json))
nodir55be77b2016-05-03 09:39:57 -0700742
nodirbe642ff2016-06-09 15:51:51 -0700743 cipd.validate_cipd_options(parser, options)
744
nodir90bc8dc2016-06-15 13:35:21 -0700745 install_packages_fn = lambda run_dir: install_packages(
nodirff531b42016-06-23 13:05:06 -0700746 run_dir, cipd.parse_package_args(options.cipd_packages),
747 options.cipd_server, options.cipd_client_package,
748 options.cipd_client_version, cache_dir=options.cipd_cache)
nodirbe642ff2016-06-09 15:51:51 -0700749
750 try:
nodir90bc8dc2016-06-15 13:35:21 -0700751 command = [] if options.isolated else args
752 if options.isolate_server:
753 storage = isolateserver.get_storage(
754 options.isolate_server, options.namespace)
755 with storage:
756 # Hashing schemes used by |storage| and |cache| MUST match.
757 assert storage.hash_algo == cache.hash_algo
nodirbe642ff2016-06-09 15:51:51 -0700758 return run_tha_test(
nodir90bc8dc2016-06-15 13:35:21 -0700759 command, options.isolated, storage, cache, options.leak_temp_dir,
760 options.json, options.root_dir, options.hard_timeout,
maruel4409e302016-07-19 14:25:51 -0700761 options.grace_period, options.bot_file, args, install_packages_fn,
762 options.use_symlinks)
763 return run_tha_test(
764 command, options.isolated, None, cache, options.leak_temp_dir,
765 options.json, options.root_dir, options.hard_timeout,
766 options.grace_period, options.bot_file, args, install_packages_fn,
767 options.use_symlinks)
nodirbe642ff2016-06-09 15:51:51 -0700768 except cipd.Error as ex:
769 print >> sys.stderr, ex.message
770 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000771
772
773if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -0700774 subprocess42.inhibit_os_error_reporting()
csharp@chromium.orgbfb98742013-03-26 20:28:36 +0000775 # Ensure that we are always running with the correct encoding.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +0000776 fix_encoding.fix_encoding()
maruel4409e302016-07-19 14:25:51 -0700777 file_path.enable_symlink()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -0500778 sys.exit(main(sys.argv[1:]))