blob: 2f80102a1e96a9ba3932105fb48cc6dcee981c80 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
maruelea586f32016-04-05 11:11:33 -07002# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07003# Use of this source code is governed under the Apache License, Version 2.0
4# that can be found in the LICENSE file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00005
nodir55be77b2016-05-03 09:39:57 -07006"""Runs a command with optional isolated input/output.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
nodir55be77b2016-05-03 09:39:57 -07008Despite name "run_isolated", can run a generic non-isolated command specified as
9args.
10
11If input isolated hash is provided, fetches it, creates a tree of hard links,
12appends args to the command in the fetched isolated and runs it.
13To improve performance, keeps a local cache.
14The local cache can safely be deleted.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050015
nodirbe642ff2016-06-09 15:51:51 -070016Any ${EXECUTABLE_SUFFIX} on the command line will be replaced with ".exe" string
17on Windows and "" on other platforms.
18
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050019Any ${ISOLATED_OUTDIR} on the command line will be replaced by the location of a
20temporary directory upon execution of the command specified in the .isolated
21file. All content written to this directory will be uploaded upon termination
22and the .isolated file describing this directory will be printed to stdout.
bpastene447c1992016-06-20 15:21:47 -070023
24Any ${SWARMING_BOT_FILE} on the command line will be replaced by the value of
25the --bot-file parameter. This file is used by a swarming bot to communicate
26state of the host to tasks. It is written to by the swarming bot's
27on_before_task() hook in the swarming server's custom bot_config.py.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000028"""
29
Marc-Antoine Rueld704a1f2017-10-31 10:51:23 -040030__version__ = '0.9.3'
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000031
aludwin7556e0c2016-10-26 08:46:10 -070032import argparse
maruel064c0a32016-04-05 11:47:15 -070033import base64
iannucci96fcccc2016-08-30 15:52:22 -070034import collections
vadimsh232f5a82017-01-20 19:23:44 -080035import contextlib
aludwin7556e0c2016-10-26 08:46:10 -070036import json
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000037import logging
38import optparse
39import os
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000040import sys
41import tempfile
maruel064c0a32016-04-05 11:47:15 -070042import time
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000043
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000044from third_party.depot_tools import fix_encoding
45
Vadim Shtayura6b555c12014-07-23 16:22:18 -070046from utils import file_path
maruel12e30012015-10-09 11:55:35 -070047from utils import fs
maruel064c0a32016-04-05 11:47:15 -070048from utils import large
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040049from utils import logging_utils
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040050from utils import on_error
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -050051from utils import subprocess42
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000052from utils import tools
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +000053from utils import zip_package
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000054
vadimsh9c54b2c2017-07-25 14:08:29 -070055from libs import luci_context
56
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080057import auth
nodirbe642ff2016-06-09 15:51:51 -070058import cipd
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000059import isolateserver
nodirf33b8d62016-10-26 22:34:58 -070060import named_cache
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000061
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000062
vadimsh@chromium.org85071062013-08-21 23:37:45 +000063# Absolute path to this file (can be None if running from zip on Mac).
tansella4949442016-06-23 22:34:32 -070064THIS_FILE_PATH = os.path.abspath(
65 __file__.decode(sys.getfilesystemencoding())) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000066
67# Directory that contains this file (might be inside zip package).
tansella4949442016-06-23 22:34:32 -070068BASE_DIR = os.path.dirname(THIS_FILE_PATH) if __file__.decode(
69 sys.getfilesystemencoding()) else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000070
71# Directory that contains currently running script file.
maruel@chromium.org814d23f2013-10-01 19:08:00 +000072if zip_package.get_main_script_path():
73 MAIN_DIR = os.path.dirname(
74 os.path.abspath(zip_package.get_main_script_path()))
75else:
76 # This happens when 'import run_isolated' is executed at the python
77 # interactive prompt, in that case __file__ is undefined.
78 MAIN_DIR = None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000079
maruele2f2cb82016-07-13 14:41:03 -070080
81# Magic variables that can be found in the isolate task command line.
82ISOLATED_OUTDIR_PARAMETER = '${ISOLATED_OUTDIR}'
83EXECUTABLE_SUFFIX_PARAMETER = '${EXECUTABLE_SUFFIX}'
84SWARMING_BOT_FILE_PARAMETER = '${SWARMING_BOT_FILE}'
85
86
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000087# The name of the log file to use.
88RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
89
maruele2f2cb82016-07-13 14:41:03 -070090
csharp@chromium.orge217f302012-11-22 16:51:53 +000091# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000092RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000093
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000094
maruele2f2cb82016-07-13 14:41:03 -070095# Use short names for temporary directories. This is driven by Windows, which
96# imposes a relatively short maximum path length of 260 characters, often
97# referred to as MAX_PATH. It is relatively easy to create files with longer
98# path length. A use case is with recursive depedency treesV like npm packages.
99#
100# It is recommended to start the script with a `root_dir` as short as
101# possible.
102# - ir stands for isolated_run
103# - io stands for isolated_out
104# - it stands for isolated_tmp
105ISOLATED_RUN_DIR = u'ir'
106ISOLATED_OUT_DIR = u'io'
107ISOLATED_TMP_DIR = u'it'
108
109
marueld928c862017-06-08 08:20:04 -0700110OUTLIVING_ZOMBIE_MSG = """\
111*** Swarming tried multiple times to delete the %s directory and failed ***
112*** Hard failing the task ***
113
114Swarming detected that your testing script ran an executable, which may have
115started a child executable, and the main script returned early, leaving the
116children executables playing around unguided.
117
118You don't want to leave children processes outliving the task on the Swarming
119bot, do you? The Swarming bot doesn't.
120
121How to fix?
122- For any process that starts children processes, make sure all children
123 processes terminated properly before each parent process exits. This is
124 especially important in very deep process trees.
125 - This must be done properly both in normal successful task and in case of
126 task failure. Cleanup is very important.
127- The Swarming bot sends a SIGTERM in case of timeout.
128 - You have %s seconds to comply after the signal was sent to the process
129 before the process is forcibly killed.
130- To achieve not leaking children processes in case of signals on timeout, you
131 MUST handle signals in each executable / python script and propagate them to
132 children processes.
133 - When your test script (python or binary) receives a signal like SIGTERM or
134 CTRL_BREAK_EVENT on Windows), send it to all children processes and wait for
135 them to terminate before quitting.
136
137See
138https://github.com/luci/luci-py/blob/master/appengine/swarming/doc/Bot.md#graceful-termination-aka-the-sigterm-and-sigkill-dance
139for more information.
140
141*** May the SIGKILL force be with you ***
142"""
143
144
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000145def get_as_zip_package(executable=True):
146 """Returns ZipPackage with this module and all its dependencies.
147
148 If |executable| is True will store run_isolated.py as __main__.py so that
149 zip package is directly executable be python.
150 """
151 # Building a zip package when running from another zip package is
152 # unsupported and probably unneeded.
153 assert not zip_package.is_zipped_module(sys.modules[__name__])
vadimsh@chromium.org85071062013-08-21 23:37:45 +0000154 assert THIS_FILE_PATH
155 assert BASE_DIR
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000156 package = zip_package.ZipPackage(root=BASE_DIR)
157 package.add_python_file(THIS_FILE_PATH, '__main__.py' if executable else None)
aludwin81178302016-11-30 17:18:49 -0800158 package.add_python_file(os.path.join(BASE_DIR, 'isolate_storage.py'))
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400159 package.add_python_file(os.path.join(BASE_DIR, 'isolated_format.py'))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000160 package.add_python_file(os.path.join(BASE_DIR, 'isolateserver.py'))
Vadim Shtayurae34e13a2014-02-02 11:23:26 -0800161 package.add_python_file(os.path.join(BASE_DIR, 'auth.py'))
nodirbe642ff2016-06-09 15:51:51 -0700162 package.add_python_file(os.path.join(BASE_DIR, 'cipd.py'))
nodirf33b8d62016-10-26 22:34:58 -0700163 package.add_python_file(os.path.join(BASE_DIR, 'named_cache.py'))
tanselle4288c32016-07-28 09:45:40 -0700164 package.add_directory(os.path.join(BASE_DIR, 'libs'))
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000165 package.add_directory(os.path.join(BASE_DIR, 'third_party'))
166 package.add_directory(os.path.join(BASE_DIR, 'utils'))
167 return package
168
169
maruel03e11842016-07-14 10:50:16 -0700170def make_temp_dir(prefix, root_dir):
171 """Returns a new unique temporary directory."""
172 return unicode(tempfile.mkdtemp(prefix=prefix, dir=root_dir))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000173
174
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500175def change_tree_read_only(rootdir, read_only):
176 """Changes the tree read-only bits according to the read_only specification.
177
178 The flag can be 0, 1 or 2, which will affect the possibility to modify files
179 and create or delete files.
180 """
181 if read_only == 2:
182 # Files and directories (except on Windows) are marked read only. This
183 # inhibits modifying, creating or deleting files in the test directory,
184 # except on Windows where creating and deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400185 file_path.make_tree_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500186 elif read_only == 1:
187 # Files are marked read only but not the directories. This inhibits
188 # modifying files but creating or deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400189 file_path.make_tree_files_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500190 elif read_only in (0, None):
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500191 # Anything can be modified.
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500192 # TODO(maruel): This is currently dangerous as long as DiskCache.touch()
193 # is not yet changed to verify the hash of the content of the files it is
194 # looking at, so that if a test modifies an input file, the file must be
195 # deleted.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400196 file_path.make_tree_writeable(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500197 else:
198 raise ValueError(
199 'change_tree_read_only(%s, %s): Unknown flag %s' %
200 (rootdir, read_only, read_only))
201
202
vadimsh9c54b2c2017-07-25 14:08:29 -0700203@contextlib.contextmanager
204def set_luci_context_account(account, tmp_dir):
205 """Sets LUCI_CONTEXT account to be used by the task.
206
207 If 'account' is None or '', does nothing at all. This happens when
208 run_isolated.py is called without '--switch-to-account' flag. In this case,
209 if run_isolated.py is running in some LUCI_CONTEXT environment, the task will
210 just inherit whatever account is already set. This may happen is users invoke
211 run_isolated.py explicitly from their code.
212
213 If the requested account is not defined in the context, switches to
214 non-authenticated access. This happens for Swarming tasks that don't use
215 'task' service accounts.
216
217 If not using LUCI_CONTEXT-based auth, does nothing.
218 If already running as requested account, does nothing.
219 """
220 if not account:
221 # Not actually switching.
222 yield
223 return
224
225 local_auth = luci_context.read('local_auth')
226 if not local_auth:
227 # Not using LUCI_CONTEXT auth at all.
228 yield
229 return
230
231 # See LUCI_CONTEXT.md for the format of 'local_auth'.
232 if local_auth.get('default_account_id') == account:
233 # Already set, no need to switch.
234 yield
235 return
236
237 available = {a['id'] for a in local_auth.get('accounts') or []}
238 if account in available:
239 logging.info('Switching default LUCI_CONTEXT account to %r', account)
240 local_auth['default_account_id'] = account
241 else:
242 logging.warning(
243 'Requested LUCI_CONTEXT account %r is not available (have only %r), '
244 'disabling authentication', account, sorted(available))
245 local_auth.pop('default_account_id', None)
246
247 with luci_context.write(_tmpdir=tmp_dir, local_auth=local_auth):
248 yield
249
250
nodir90bc8dc2016-06-15 13:35:21 -0700251def process_command(command, out_dir, bot_file):
nodirbe642ff2016-06-09 15:51:51 -0700252 """Replaces variables in a command line.
253
254 Raises:
255 ValueError if a parameter is requested in |command| but its value is not
256 provided.
257 """
maruela9cfd6f2015-09-15 11:03:15 -0700258 def fix(arg):
nodirbe642ff2016-06-09 15:51:51 -0700259 arg = arg.replace(EXECUTABLE_SUFFIX_PARAMETER, cipd.EXECUTABLE_SUFFIX)
260 replace_slash = False
nodir55be77b2016-05-03 09:39:57 -0700261 if ISOLATED_OUTDIR_PARAMETER in arg:
nodirbe642ff2016-06-09 15:51:51 -0700262 if not out_dir:
maruel7f63a272016-07-12 12:40:36 -0700263 raise ValueError(
264 'output directory is requested in command, but not provided; '
265 'please specify one')
nodir55be77b2016-05-03 09:39:57 -0700266 arg = arg.replace(ISOLATED_OUTDIR_PARAMETER, out_dir)
nodirbe642ff2016-06-09 15:51:51 -0700267 replace_slash = True
nodir90bc8dc2016-06-15 13:35:21 -0700268 if SWARMING_BOT_FILE_PARAMETER in arg:
269 if bot_file:
270 arg = arg.replace(SWARMING_BOT_FILE_PARAMETER, bot_file)
271 replace_slash = True
272 else:
273 logging.warning('SWARMING_BOT_FILE_PARAMETER found in command, but no '
274 'bot_file specified. Leaving parameter unchanged.')
nodirbe642ff2016-06-09 15:51:51 -0700275 if replace_slash:
276 # Replace slashes only if parameters are present
nodir55be77b2016-05-03 09:39:57 -0700277 # because of arguments like '${ISOLATED_OUTDIR}/foo/bar'
278 arg = arg.replace('/', os.sep)
maruela9cfd6f2015-09-15 11:03:15 -0700279 return arg
280
281 return [fix(arg) for arg in command]
282
283
vadimsh232f5a82017-01-20 19:23:44 -0800284def get_command_env(tmp_dir, cipd_info):
285 """Returns full OS environment to run a command in.
286
287 Sets up TEMP, puts directory with cipd binary in front of PATH, and exposes
288 CIPD_CACHE_DIR env var.
289
290 Args:
291 tmp_dir: temp directory.
292 cipd_info: CipdInfo object is cipd client is used, None if not.
293 """
294 def to_fs_enc(s):
295 if isinstance(s, str):
296 return s
297 return s.encode(sys.getfilesystemencoding())
298
299 env = os.environ.copy()
300
iannucciac0342c2017-02-24 05:47:01 -0800301 # TMPDIR is specified as the POSIX standard envvar for the temp directory.
iannucci460def72017-02-24 10:49:48 -0800302 # * mktemp on linux respects $TMPDIR, not $TMP
303 # * mktemp on OS X SOMETIMES respects $TMPDIR
iannucciac0342c2017-02-24 05:47:01 -0800304 # * chromium's base utils respects $TMPDIR on linux, $TEMP on windows.
305 # Unfortunately at the time of writing it completely ignores all envvars
306 # on OS X.
iannucci460def72017-02-24 10:49:48 -0800307 # * python respects TMPDIR, TEMP, and TMP (regardless of platform)
308 # * golang respects TMPDIR on linux+mac, TEMP on windows.
iannucciac0342c2017-02-24 05:47:01 -0800309 key = {'win32': 'TEMP'}.get(sys.platform, 'TMPDIR')
vadimsh232f5a82017-01-20 19:23:44 -0800310 env[key] = to_fs_enc(tmp_dir)
311
312 if cipd_info:
313 bin_dir = os.path.dirname(cipd_info.client.binary_path)
314 env['PATH'] = '%s%s%s' % (to_fs_enc(bin_dir), os.pathsep, env['PATH'])
315 env['CIPD_CACHE_DIR'] = to_fs_enc(cipd_info.cache_dir)
316
317 return env
318
319
320def run_command(command, cwd, env, hard_timeout, grace_period):
maruel6be7f9e2015-10-01 12:25:30 -0700321 """Runs the command.
322
323 Returns:
324 tuple(process exit code, bool if had a hard timeout)
325 """
maruela9cfd6f2015-09-15 11:03:15 -0700326 logging.info('run_command(%s, %s)' % (command, cwd))
marueleb5fbee2015-09-17 13:01:36 -0700327
maruel6be7f9e2015-10-01 12:25:30 -0700328 exit_code = None
329 had_hard_timeout = False
maruela9cfd6f2015-09-15 11:03:15 -0700330 with tools.Profiler('RunTest'):
maruel6be7f9e2015-10-01 12:25:30 -0700331 proc = None
332 had_signal = []
maruela9cfd6f2015-09-15 11:03:15 -0700333 try:
maruel6be7f9e2015-10-01 12:25:30 -0700334 # TODO(maruel): This code is imperfect. It doesn't handle well signals
335 # during the download phase and there's short windows were things can go
336 # wrong.
337 def handler(signum, _frame):
338 if proc and not had_signal:
339 logging.info('Received signal %d', signum)
340 had_signal.append(True)
maruel556d9052015-10-05 11:12:44 -0700341 raise subprocess42.TimeoutExpired(command, None)
maruel6be7f9e2015-10-01 12:25:30 -0700342
343 proc = subprocess42.Popen(command, cwd=cwd, env=env, detached=True)
344 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
345 try:
346 exit_code = proc.wait(hard_timeout or None)
347 except subprocess42.TimeoutExpired:
348 if not had_signal:
349 logging.warning('Hard timeout')
350 had_hard_timeout = True
351 logging.warning('Sending SIGTERM')
352 proc.terminate()
353
354 # Ignore signals in grace period. Forcibly give the grace period to the
355 # child process.
356 if exit_code is None:
357 ignore = lambda *_: None
358 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
359 try:
360 exit_code = proc.wait(grace_period or None)
361 except subprocess42.TimeoutExpired:
362 # Now kill for real. The user can distinguish between the
363 # following states:
364 # - signal but process exited within grace period,
365 # hard_timed_out will be set but the process exit code will be
366 # script provided.
367 # - processed exited late, exit code will be -9 on posix.
368 logging.warning('Grace exhausted; sending SIGKILL')
369 proc.kill()
martiniss5c8043e2017-08-01 17:09:43 -0700370 logging.info('Waiting for process exit')
maruel6be7f9e2015-10-01 12:25:30 -0700371 exit_code = proc.wait()
maruela9cfd6f2015-09-15 11:03:15 -0700372 except OSError:
373 # This is not considered to be an internal error. The executable simply
374 # does not exit.
maruela72f46e2016-02-24 11:05:45 -0800375 sys.stderr.write(
376 '<The executable does not exist or a dependent library is missing>\n'
377 '<Check for missing .so/.dll in the .isolate or GN file>\n'
378 '<Command: %s>\n' % command)
379 if os.environ.get('SWARMING_TASK_ID'):
380 # Give an additional hint when running as a swarming task.
381 sys.stderr.write(
382 '<See the task\'s page for commands to help diagnose this issue '
383 'by reproducing the task locally>\n')
maruela9cfd6f2015-09-15 11:03:15 -0700384 exit_code = 1
385 logging.info(
386 'Command finished with exit code %d (%s)',
387 exit_code, hex(0xffffffff & exit_code))
maruel6be7f9e2015-10-01 12:25:30 -0700388 return exit_code, had_hard_timeout
maruela9cfd6f2015-09-15 11:03:15 -0700389
390
maruel4409e302016-07-19 14:25:51 -0700391def fetch_and_map(isolated_hash, storage, cache, outdir, use_symlinks):
392 """Fetches an isolated tree, create the tree and returns (bundle, stats)."""
nodir6f801882016-04-29 14:41:50 -0700393 start = time.time()
394 bundle = isolateserver.fetch_isolated(
395 isolated_hash=isolated_hash,
396 storage=storage,
397 cache=cache,
maruel4409e302016-07-19 14:25:51 -0700398 outdir=outdir,
399 use_symlinks=use_symlinks)
nodir6f801882016-04-29 14:41:50 -0700400 return bundle, {
401 'duration': time.time() - start,
402 'initial_number_items': cache.initial_number_items,
403 'initial_size': cache.initial_size,
404 'items_cold': base64.b64encode(large.pack(sorted(cache.added))),
405 'items_hot': base64.b64encode(
tansell9e04a8d2016-07-28 09:31:59 -0700406 large.pack(sorted(set(cache.used) - set(cache.added)))),
nodir6f801882016-04-29 14:41:50 -0700407 }
408
409
aludwin0a8e17d2016-10-27 15:57:39 -0700410def link_outputs_to_outdir(run_dir, out_dir, outputs):
411 """Links any named outputs to out_dir so they can be uploaded.
412
413 Raises an error if the file already exists in that directory.
414 """
415 if not outputs:
416 return
417 isolateserver.create_directories(out_dir, outputs)
418 for o in outputs:
419 try:
aludwinf31ab802017-06-12 06:03:00 -0700420 infile = os.path.join(run_dir, o)
421 outfile = os.path.join(out_dir, o)
422 if fs.islink(infile):
423 # TODO(aludwin): handle directories
424 fs.copy2(infile, outfile)
425 else:
426 file_path.link_file(outfile, infile, file_path.HARDLINK_WITH_FALLBACK)
aludwin0a8e17d2016-10-27 15:57:39 -0700427 except OSError as e:
aludwin81178302016-11-30 17:18:49 -0800428 logging.info("Couldn't collect output file %s: %s", o, e)
aludwin0a8e17d2016-10-27 15:57:39 -0700429
430
maruela9cfd6f2015-09-15 11:03:15 -0700431def delete_and_upload(storage, out_dir, leak_temp_dir):
432 """Deletes the temporary run directory and uploads results back.
433
434 Returns:
nodir6f801882016-04-29 14:41:50 -0700435 tuple(outputs_ref, success, stats)
maruel064c0a32016-04-05 11:47:15 -0700436 - outputs_ref: a dict referring to the results archived back to the isolated
437 server, if applicable.
438 - success: False if something occurred that means that the task must
439 forcibly be considered a failure, e.g. zombie processes were left
440 behind.
nodir6f801882016-04-29 14:41:50 -0700441 - stats: uploading stats.
maruela9cfd6f2015-09-15 11:03:15 -0700442 """
maruela9cfd6f2015-09-15 11:03:15 -0700443 # Upload out_dir and generate a .isolated file out of this directory. It is
444 # only done if files were written in the directory.
445 outputs_ref = None
maruel064c0a32016-04-05 11:47:15 -0700446 cold = []
447 hot = []
nodir6f801882016-04-29 14:41:50 -0700448 start = time.time()
449
maruel12e30012015-10-09 11:55:35 -0700450 if fs.isdir(out_dir) and fs.listdir(out_dir):
maruela9cfd6f2015-09-15 11:03:15 -0700451 with tools.Profiler('ArchiveOutput'):
452 try:
maruel064c0a32016-04-05 11:47:15 -0700453 results, f_cold, f_hot = isolateserver.archive_files_to_storage(
maruela9cfd6f2015-09-15 11:03:15 -0700454 storage, [out_dir], None)
455 outputs_ref = {
456 'isolated': results[0][0],
457 'isolatedserver': storage.location,
458 'namespace': storage.namespace,
459 }
maruel064c0a32016-04-05 11:47:15 -0700460 cold = sorted(i.size for i in f_cold)
461 hot = sorted(i.size for i in f_hot)
maruela9cfd6f2015-09-15 11:03:15 -0700462 except isolateserver.Aborted:
463 # This happens when a signal SIGTERM was received while uploading data.
464 # There is 2 causes:
465 # - The task was too slow and was about to be killed anyway due to
466 # exceeding the hard timeout.
467 # - The amount of data uploaded back is very large and took too much
468 # time to archive.
469 sys.stderr.write('Received SIGTERM while uploading')
470 # Re-raise, so it will be treated as an internal failure.
471 raise
nodir6f801882016-04-29 14:41:50 -0700472
473 success = False
maruela9cfd6f2015-09-15 11:03:15 -0700474 try:
maruel12e30012015-10-09 11:55:35 -0700475 if (not leak_temp_dir and fs.isdir(out_dir) and
maruel6eeea7d2015-09-16 12:17:42 -0700476 not file_path.rmtree(out_dir)):
maruela9cfd6f2015-09-15 11:03:15 -0700477 logging.error('Had difficulties removing out_dir %s', out_dir)
nodir6f801882016-04-29 14:41:50 -0700478 else:
479 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700480 except OSError as e:
481 # When this happens, it means there's a process error.
maruel12e30012015-10-09 11:55:35 -0700482 logging.exception('Had difficulties removing out_dir %s: %s', out_dir, e)
nodir6f801882016-04-29 14:41:50 -0700483 stats = {
484 'duration': time.time() - start,
485 'items_cold': base64.b64encode(large.pack(cold)),
486 'items_hot': base64.b64encode(large.pack(hot)),
487 }
488 return outputs_ref, success, stats
maruela9cfd6f2015-09-15 11:03:15 -0700489
490
marueleb5fbee2015-09-17 13:01:36 -0700491def map_and_run(
nodir0ae98b32017-05-11 13:21:53 -0700492 command, isolated_hash, storage, isolate_cache, outputs,
493 install_named_caches, leak_temp_dir, root_dir, hard_timeout, grace_period,
Marc-Antoine Ruel49e347d2017-10-24 16:52:02 -0700494 bot_file, switch_to_account, install_packages_fn, use_symlinks, raw_cmd,
vadimsh9c54b2c2017-07-25 14:08:29 -0700495 constant_run_path):
nodir55be77b2016-05-03 09:39:57 -0700496 """Runs a command with optional isolated input/output.
497
498 See run_tha_test for argument documentation.
499
500 Returns metadata about the result.
501 """
maruelabec63c2017-04-26 11:53:24 -0700502 assert isinstance(command, list), command
nodir56efa452016-10-12 12:17:39 -0700503 assert root_dir or root_dir is None
maruela9cfd6f2015-09-15 11:03:15 -0700504 result = {
maruel064c0a32016-04-05 11:47:15 -0700505 'duration': None,
maruela9cfd6f2015-09-15 11:03:15 -0700506 'exit_code': None,
maruel6be7f9e2015-10-01 12:25:30 -0700507 'had_hard_timeout': False,
maruela9cfd6f2015-09-15 11:03:15 -0700508 'internal_failure': None,
maruel064c0a32016-04-05 11:47:15 -0700509 'stats': {
nodir55715712016-06-03 12:28:19 -0700510 # 'isolated': {
nodirbe642ff2016-06-09 15:51:51 -0700511 # 'cipd': {
512 # 'duration': 0.,
513 # 'get_client_duration': 0.,
514 # },
nodir55715712016-06-03 12:28:19 -0700515 # 'download': {
516 # 'duration': 0.,
517 # 'initial_number_items': 0,
518 # 'initial_size': 0,
519 # 'items_cold': '<large.pack()>',
520 # 'items_hot': '<large.pack()>',
521 # },
522 # 'upload': {
523 # 'duration': 0.,
524 # 'items_cold': '<large.pack()>',
525 # 'items_hot': '<large.pack()>',
526 # },
maruel064c0a32016-04-05 11:47:15 -0700527 # },
528 },
iannucci96fcccc2016-08-30 15:52:22 -0700529 # 'cipd_pins': {
530 # 'packages': [
531 # {'package_name': ..., 'version': ..., 'path': ...},
532 # ...
533 # ],
534 # 'client_package': {'package_name': ..., 'version': ...},
535 # },
maruela9cfd6f2015-09-15 11:03:15 -0700536 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700537 'version': 5,
maruela9cfd6f2015-09-15 11:03:15 -0700538 }
nodirbe642ff2016-06-09 15:51:51 -0700539
marueleb5fbee2015-09-17 13:01:36 -0700540 if root_dir:
nodire5028a92016-04-29 14:38:21 -0700541 file_path.ensure_tree(root_dir, 0700)
nodir56efa452016-10-12 12:17:39 -0700542 elif isolate_cache.cache_dir:
543 root_dir = os.path.dirname(isolate_cache.cache_dir)
maruele2f2cb82016-07-13 14:41:03 -0700544 # See comment for these constants.
maruelcffa0542017-04-07 08:39:20 -0700545 # If root_dir is not specified, it is not constant.
546 # TODO(maruel): This is not obvious. Change this to become an error once we
547 # make the constant_run_path an exposed flag.
548 if constant_run_path and root_dir:
549 run_dir = os.path.join(root_dir, ISOLATED_RUN_DIR)
maruel5c4eed82017-05-26 05:33:40 -0700550 if os.path.isdir(run_dir):
551 file_path.rmtree(run_dir)
maruelcffa0542017-04-07 08:39:20 -0700552 os.mkdir(run_dir)
553 else:
554 run_dir = make_temp_dir(ISOLATED_RUN_DIR, root_dir)
maruel03e11842016-07-14 10:50:16 -0700555 # storage should be normally set but don't crash if it is not. This can happen
556 # as Swarming task can run without an isolate server.
maruele2f2cb82016-07-13 14:41:03 -0700557 out_dir = make_temp_dir(ISOLATED_OUT_DIR, root_dir) if storage else None
558 tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, root_dir)
nodir55be77b2016-05-03 09:39:57 -0700559 cwd = run_dir
maruela9cfd6f2015-09-15 11:03:15 -0700560
nodir55be77b2016-05-03 09:39:57 -0700561 try:
vadimsh232f5a82017-01-20 19:23:44 -0800562 with install_packages_fn(run_dir) as cipd_info:
563 if cipd_info:
564 result['stats']['cipd'] = cipd_info.stats
565 result['cipd_pins'] = cipd_info.pins
nodir90bc8dc2016-06-15 13:35:21 -0700566
vadimsh232f5a82017-01-20 19:23:44 -0800567 if isolated_hash:
568 isolated_stats = result['stats'].setdefault('isolated', {})
569 bundle, isolated_stats['download'] = fetch_and_map(
570 isolated_hash=isolated_hash,
571 storage=storage,
572 cache=isolate_cache,
573 outdir=run_dir,
574 use_symlinks=use_symlinks)
vadimsh232f5a82017-01-20 19:23:44 -0800575 change_tree_read_only(run_dir, bundle.read_only)
maruelabec63c2017-04-26 11:53:24 -0700576 # Inject the command
Marc-Antoine Ruel49e347d2017-10-24 16:52:02 -0700577 if not raw_cmd and bundle.command:
maruelabec63c2017-04-26 11:53:24 -0700578 command = bundle.command + command
Marc-Antoine Rueld704a1f2017-10-31 10:51:23 -0400579 # Only set the relative directory if the isolated file specified a
580 # command, and no raw command was specified.
581 if bundle.relative_cwd:
582 cwd = os.path.normpath(os.path.join(cwd, bundle.relative_cwd))
maruelabec63c2017-04-26 11:53:24 -0700583
584 if not command:
585 # Handle this as a task failure, not an internal failure.
586 sys.stderr.write(
587 '<No command was specified!>\n'
588 '<Please secify a command when triggering your Swarming task>\n')
589 result['exit_code'] = 1
590 return result
nodirbe642ff2016-06-09 15:51:51 -0700591
vadimsh232f5a82017-01-20 19:23:44 -0800592 # If we have an explicit list of files to return, make sure their
593 # directories exist now.
594 if storage and outputs:
595 isolateserver.create_directories(run_dir, outputs)
aludwin0a8e17d2016-10-27 15:57:39 -0700596
vadimsh232f5a82017-01-20 19:23:44 -0800597 command = tools.fix_python_path(command)
598 command = process_command(command, out_dir, bot_file)
599 file_path.ensure_command_has_abs_path(command, cwd)
nodirbe642ff2016-06-09 15:51:51 -0700600
nodir0ae98b32017-05-11 13:21:53 -0700601 with install_named_caches(run_dir):
nodird6160682017-02-02 13:03:35 -0800602 sys.stdout.flush()
603 start = time.time()
604 try:
vadimsh9c54b2c2017-07-25 14:08:29 -0700605 # Need to switch the default account before 'get_command_env' call,
606 # so it can grab correct value of LUCI_CONTEXT env var.
607 with set_luci_context_account(switch_to_account, tmp_dir):
608 result['exit_code'], result['had_hard_timeout'] = run_command(
609 command, cwd, get_command_env(tmp_dir, cipd_info),
610 hard_timeout, grace_period)
nodird6160682017-02-02 13:03:35 -0800611 finally:
612 result['duration'] = max(time.time() - start, 0)
maruela9cfd6f2015-09-15 11:03:15 -0700613 except Exception as e:
nodir90bc8dc2016-06-15 13:35:21 -0700614 # An internal error occurred. Report accordingly so the swarming task will
615 # be retried automatically.
maruel12e30012015-10-09 11:55:35 -0700616 logging.exception('internal failure: %s', e)
maruela9cfd6f2015-09-15 11:03:15 -0700617 result['internal_failure'] = str(e)
618 on_error.report(None)
aludwin0a8e17d2016-10-27 15:57:39 -0700619
620 # Clean up
maruela9cfd6f2015-09-15 11:03:15 -0700621 finally:
622 try:
aludwin0a8e17d2016-10-27 15:57:39 -0700623 # Try to link files to the output directory, if specified.
624 if out_dir:
625 link_outputs_to_outdir(run_dir, out_dir, outputs)
626
nodir32a1ec12016-10-26 18:34:07 -0700627 success = False
maruela9cfd6f2015-09-15 11:03:15 -0700628 if leak_temp_dir:
nodir32a1ec12016-10-26 18:34:07 -0700629 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700630 logging.warning(
631 'Deliberately leaking %s for later examination', run_dir)
marueleb5fbee2015-09-17 13:01:36 -0700632 else:
maruel84537cb2015-10-16 14:21:28 -0700633 # On Windows rmtree(run_dir) call above has a synchronization effect: it
634 # finishes only when all task child processes terminate (since a running
635 # process locks *.exe file). Examine out_dir only after that call
636 # completes (since child processes may write to out_dir too and we need
637 # to wait for them to finish).
638 if fs.isdir(run_dir):
639 try:
640 success = file_path.rmtree(run_dir)
641 except OSError as e:
642 logging.error('Failure with %s', e)
643 success = False
644 if not success:
marueld928c862017-06-08 08:20:04 -0700645 sys.stderr.write(OUTLIVING_ZOMBIE_MSG % ('run', grace_period))
maruel84537cb2015-10-16 14:21:28 -0700646 if result['exit_code'] == 0:
647 result['exit_code'] = 1
648 if fs.isdir(tmp_dir):
649 try:
650 success = file_path.rmtree(tmp_dir)
651 except OSError as e:
652 logging.error('Failure with %s', e)
653 success = False
654 if not success:
maruelca2a38c2017-06-08 13:06:40 -0700655 sys.stderr.write(OUTLIVING_ZOMBIE_MSG % ('temp', grace_period))
maruel84537cb2015-10-16 14:21:28 -0700656 if result['exit_code'] == 0:
657 result['exit_code'] = 1
maruela9cfd6f2015-09-15 11:03:15 -0700658
marueleb5fbee2015-09-17 13:01:36 -0700659 # This deletes out_dir if leak_temp_dir is not set.
nodir9130f072016-05-27 13:59:08 -0700660 if out_dir:
nodir55715712016-06-03 12:28:19 -0700661 isolated_stats = result['stats'].setdefault('isolated', {})
662 result['outputs_ref'], success, isolated_stats['upload'] = (
nodir9130f072016-05-27 13:59:08 -0700663 delete_and_upload(storage, out_dir, leak_temp_dir))
maruela9cfd6f2015-09-15 11:03:15 -0700664 if not success and result['exit_code'] == 0:
665 result['exit_code'] = 1
666 except Exception as e:
667 # Swallow any exception in the main finally clause.
nodir9130f072016-05-27 13:59:08 -0700668 if out_dir:
669 logging.exception('Leaking out_dir %s: %s', out_dir, e)
maruela9cfd6f2015-09-15 11:03:15 -0700670 result['internal_failure'] = str(e)
671 return result
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500672
673
Marc-Antoine Ruel0ec868b2015-08-12 14:12:46 -0400674def run_tha_test(
nodir0ae98b32017-05-11 13:21:53 -0700675 command, isolated_hash, storage, isolate_cache, outputs,
676 install_named_caches, leak_temp_dir, result_json, root_dir, hard_timeout,
vadimsh9c54b2c2017-07-25 14:08:29 -0700677 grace_period, bot_file, switch_to_account, install_packages_fn,
Marc-Antoine Ruel49e347d2017-10-24 16:52:02 -0700678 use_symlinks, raw_cmd):
nodir55be77b2016-05-03 09:39:57 -0700679 """Runs an executable and records execution metadata.
680
681 Either command or isolated_hash must be specified.
682
683 If isolated_hash is specified, downloads the dependencies in the cache,
684 hardlinks them into a temporary directory and runs the command specified in
685 the .isolated.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500686
687 A temporary directory is created to hold the output files. The content inside
688 this directory will be uploaded back to |storage| packaged as a .isolated
689 file.
690
691 Arguments:
maruelabec63c2017-04-26 11:53:24 -0700692 command: a list of string; the command to run OR optional arguments to add
693 to the command stated in the .isolated file if a command was
694 specified.
Marc-Antoine Ruel35b58432014-12-08 17:40:40 -0500695 isolated_hash: the SHA-1 of the .isolated file that must be retrieved to
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500696 recreate the tree of files to run the target executable.
nodir55be77b2016-05-03 09:39:57 -0700697 The command specified in the .isolated is executed.
698 Mutually exclusive with command argument.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500699 storage: an isolateserver.Storage object to retrieve remote objects. This
700 object has a reference to an isolateserver.StorageApi, which does
701 the actual I/O.
nodir6b945692016-10-19 19:09:06 -0700702 isolate_cache: an isolateserver.LocalCache to keep from retrieving the
703 same objects constantly by caching the objects retrieved.
704 Can be on-disk or in-memory.
vadimsh9c54b2c2017-07-25 14:08:29 -0700705 outputs: list of paths relative to root_dir to put into the output isolated
706 bundle upon task completion (see link_outputs_to_outdir).
nodir0ae98b32017-05-11 13:21:53 -0700707 install_named_caches: a function (run_dir) => context manager that installs
vadimsh9c54b2c2017-07-25 14:08:29 -0700708 named caches into |run_dir|.
Kenneth Russell61d42352014-09-15 11:41:16 -0700709 leak_temp_dir: if true, the temporary directory will be deliberately leaked
710 for later examination.
maruela9cfd6f2015-09-15 11:03:15 -0700711 result_json: file path to dump result metadata into. If set, the process
nodirbe642ff2016-06-09 15:51:51 -0700712 exit code is always 0 unless an internal error occurred.
nodir90bc8dc2016-06-15 13:35:21 -0700713 root_dir: path to the directory to use to create the temporary directory. If
marueleb5fbee2015-09-17 13:01:36 -0700714 not specified, a random temporary directory is created.
maruel6be7f9e2015-10-01 12:25:30 -0700715 hard_timeout: kills the process if it lasts more than this amount of
716 seconds.
717 grace_period: number of seconds to wait between SIGTERM and SIGKILL.
vadimsh9c54b2c2017-07-25 14:08:29 -0700718 bot_file: path to a file with bot state, used in place of
719 ${SWARMING_BOT_FILE} task command line argument.
720 switch_to_account: a logical account to switch LUCI_CONTEXT into.
iannuccib58d10d2017-03-18 02:00:25 -0700721 install_packages_fn: context manager dir => CipdInfo, see
vadimsh9c54b2c2017-07-25 14:08:29 -0700722 install_client_and_packages.
maruel4409e302016-07-19 14:25:51 -0700723 use_symlinks: create tree with symlinks instead of hardlinks.
Marc-Antoine Ruel49e347d2017-10-24 16:52:02 -0700724 raw_cmd: ignore the command in the isolated file.
maruela9cfd6f2015-09-15 11:03:15 -0700725
726 Returns:
727 Process exit code that should be used.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000728 """
maruela76b9ee2015-12-15 06:18:08 -0800729 if result_json:
730 # Write a json output file right away in case we get killed.
731 result = {
732 'exit_code': None,
733 'had_hard_timeout': False,
734 'internal_failure': 'Was terminated before completion',
735 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700736 'version': 5,
maruela76b9ee2015-12-15 06:18:08 -0800737 }
738 tools.write_json(result_json, result, dense=True)
739
maruela9cfd6f2015-09-15 11:03:15 -0700740 # run_isolated exit code. Depends on if result_json is used or not.
741 result = map_and_run(
nodir220308c2017-02-01 19:32:53 -0800742 command, isolated_hash, storage, isolate_cache, outputs,
nodir0ae98b32017-05-11 13:21:53 -0700743 install_named_caches, leak_temp_dir, root_dir, hard_timeout, grace_period,
Marc-Antoine Ruel49e347d2017-10-24 16:52:02 -0700744 bot_file, switch_to_account, install_packages_fn, use_symlinks, raw_cmd,
745 True)
maruela9cfd6f2015-09-15 11:03:15 -0700746 logging.info('Result:\n%s', tools.format_json(result, dense=True))
bpastene3ae09522016-06-10 17:12:59 -0700747
maruela9cfd6f2015-09-15 11:03:15 -0700748 if result_json:
maruel05d5a882015-09-21 13:59:02 -0700749 # We've found tests to delete 'work' when quitting, causing an exception
750 # here. Try to recreate the directory if necessary.
nodire5028a92016-04-29 14:38:21 -0700751 file_path.ensure_tree(os.path.dirname(result_json))
maruela9cfd6f2015-09-15 11:03:15 -0700752 tools.write_json(result_json, result, dense=True)
753 # Only return 1 if there was an internal error.
754 return int(bool(result['internal_failure']))
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000755
maruela9cfd6f2015-09-15 11:03:15 -0700756 # Marshall into old-style inline output.
757 if result['outputs_ref']:
758 data = {
759 'hash': result['outputs_ref']['isolated'],
760 'namespace': result['outputs_ref']['namespace'],
761 'storage': result['outputs_ref']['isolatedserver'],
762 }
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -0500763 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700764 print(
765 '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
766 tools.format_json(data, dense=True))
maruelb76604c2015-11-11 11:53:44 -0800767 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700768 return result['exit_code'] or int(bool(result['internal_failure']))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000769
770
iannuccib58d10d2017-03-18 02:00:25 -0700771# Yielded by 'install_client_and_packages'.
vadimsh232f5a82017-01-20 19:23:44 -0800772CipdInfo = collections.namedtuple('CipdInfo', [
773 'client', # cipd.CipdClient object
774 'cache_dir', # absolute path to bot-global cipd tag and instance cache
775 'stats', # dict with stats to return to the server
776 'pins', # dict with installed cipd pins to return to the server
777])
778
779
780@contextlib.contextmanager
781def noop_install_packages(_run_dir):
iannuccib58d10d2017-03-18 02:00:25 -0700782 """Placeholder for 'install_client_and_packages' if cipd is disabled."""
vadimsh232f5a82017-01-20 19:23:44 -0800783 yield None
784
785
iannuccib58d10d2017-03-18 02:00:25 -0700786def _install_packages(run_dir, cipd_cache_dir, client, packages, timeout):
787 """Calls 'cipd ensure' for packages.
788
789 Args:
790 run_dir (str): root of installation.
791 cipd_cache_dir (str): the directory to use for the cipd package cache.
792 client (CipdClient): the cipd client to use
793 packages: packages to install, list [(path, package_name, version), ...].
794 timeout: max duration in seconds that this function can take.
795
796 Returns: list of pinned packages. Looks like [
797 {
798 'path': 'subdirectory',
799 'package_name': 'resolved/package/name',
800 'version': 'deadbeef...',
801 },
802 ...
803 ]
804 """
805 package_pins = [None]*len(packages)
806 def insert_pin(path, name, version, idx):
807 package_pins[idx] = {
808 'package_name': name,
809 # swarming deals with 'root' as '.'
810 'path': path or '.',
811 'version': version,
812 }
813
814 by_path = collections.defaultdict(list)
815 for i, (path, name, version) in enumerate(packages):
816 # cipd deals with 'root' as ''
817 if path == '.':
818 path = ''
819 by_path[path].append((name, version, i))
820
821 pins = client.ensure(
822 run_dir,
823 {
824 subdir: [(name, vers) for name, vers, _ in pkgs]
825 for subdir, pkgs in by_path.iteritems()
826 },
827 cache_dir=cipd_cache_dir,
828 timeout=timeout,
829 )
830
831 for subdir, pin_list in sorted(pins.iteritems()):
832 this_subdir = by_path[subdir]
833 for i, (name, version) in enumerate(pin_list):
834 insert_pin(subdir, name, version, this_subdir[i][2])
835
836 assert None not in package_pins
837
838 return package_pins
839
840
vadimsh232f5a82017-01-20 19:23:44 -0800841@contextlib.contextmanager
iannuccib58d10d2017-03-18 02:00:25 -0700842def install_client_and_packages(
nodirff531b42016-06-23 13:05:06 -0700843 run_dir, packages, service_url, client_package_name,
vadimsh232f5a82017-01-20 19:23:44 -0800844 client_version, cache_dir, timeout=None):
vadimsh902948e2017-01-20 15:57:32 -0800845 """Bootstraps CIPD client and installs CIPD packages.
iannucci96fcccc2016-08-30 15:52:22 -0700846
vadimsh232f5a82017-01-20 19:23:44 -0800847 Yields CipdClient, stats, client info and pins (as single CipdInfo object).
848
849 Pins and the CIPD client info are in the form of:
iannucci96fcccc2016-08-30 15:52:22 -0700850 [
851 {
852 "path": path, "package_name": package_name, "version": version,
853 },
854 ...
855 ]
vadimsh902948e2017-01-20 15:57:32 -0800856 (the CIPD client info is a single dictionary instead of a list)
iannucci96fcccc2016-08-30 15:52:22 -0700857
858 such that they correspond 1:1 to all input package arguments from the command
859 line. These dictionaries make their all the way back to swarming, where they
860 become the arguments of CipdPackage.
nodirbe642ff2016-06-09 15:51:51 -0700861
vadimsh902948e2017-01-20 15:57:32 -0800862 If 'packages' list is empty, will bootstrap CIPD client, but won't install
863 any packages.
864
865 The bootstrapped client (regardless whether 'packages' list is empty or not),
vadimsh232f5a82017-01-20 19:23:44 -0800866 will be made available to the task via $PATH.
vadimsh902948e2017-01-20 15:57:32 -0800867
nodirbe642ff2016-06-09 15:51:51 -0700868 Args:
nodir90bc8dc2016-06-15 13:35:21 -0700869 run_dir (str): root of installation.
vadimsh902948e2017-01-20 15:57:32 -0800870 packages: packages to install, list [(path, package_name, version), ...].
nodirbe642ff2016-06-09 15:51:51 -0700871 service_url (str): CIPD server url, e.g.
872 "https://chrome-infra-packages.appspot.com."
nodir90bc8dc2016-06-15 13:35:21 -0700873 client_package_name (str): CIPD package name of CIPD client.
874 client_version (str): Version of CIPD client.
nodirbe642ff2016-06-09 15:51:51 -0700875 cache_dir (str): where to keep cache of cipd clients, packages and tags.
876 timeout: max duration in seconds that this function can take.
nodirbe642ff2016-06-09 15:51:51 -0700877 """
878 assert cache_dir
nodir90bc8dc2016-06-15 13:35:21 -0700879
nodirbe642ff2016-06-09 15:51:51 -0700880 timeoutfn = tools.sliding_timeout(timeout)
nodirbe642ff2016-06-09 15:51:51 -0700881 start = time.time()
nodirbe642ff2016-06-09 15:51:51 -0700882
vadimsh902948e2017-01-20 15:57:32 -0800883 cache_dir = os.path.abspath(cache_dir)
vadimsh232f5a82017-01-20 19:23:44 -0800884 cipd_cache_dir = os.path.join(cache_dir, 'cache') # tag and instance caches
nodir90bc8dc2016-06-15 13:35:21 -0700885 run_dir = os.path.abspath(run_dir)
vadimsh902948e2017-01-20 15:57:32 -0800886 packages = packages or []
nodir90bc8dc2016-06-15 13:35:21 -0700887
nodirbe642ff2016-06-09 15:51:51 -0700888 get_client_start = time.time()
889 client_manager = cipd.get_client(
890 service_url, client_package_name, client_version, cache_dir,
891 timeout=timeoutfn())
iannucci96fcccc2016-08-30 15:52:22 -0700892
nodirbe642ff2016-06-09 15:51:51 -0700893 with client_manager as client:
894 get_client_duration = time.time() - get_client_start
nodir90bc8dc2016-06-15 13:35:21 -0700895
iannuccib58d10d2017-03-18 02:00:25 -0700896 package_pins = []
897 if packages:
898 package_pins = _install_packages(
899 run_dir, cipd_cache_dir, client, packages, timeoutfn())
900
901 file_path.make_tree_files_read_only(run_dir)
nodir90bc8dc2016-06-15 13:35:21 -0700902
vadimsh232f5a82017-01-20 19:23:44 -0800903 total_duration = time.time() - start
904 logging.info(
905 'Installing CIPD client and packages took %d seconds', total_duration)
nodir90bc8dc2016-06-15 13:35:21 -0700906
vadimsh232f5a82017-01-20 19:23:44 -0800907 yield CipdInfo(
908 client=client,
909 cache_dir=cipd_cache_dir,
910 stats={
911 'duration': total_duration,
912 'get_client_duration': get_client_duration,
913 },
914 pins={
iannuccib58d10d2017-03-18 02:00:25 -0700915 'client_package': {
916 'package_name': client.package_name,
917 'version': client.instance_id,
918 },
vadimsh232f5a82017-01-20 19:23:44 -0800919 'packages': package_pins,
920 })
nodirbe642ff2016-06-09 15:51:51 -0700921
922
nodirf33b8d62016-10-26 22:34:58 -0700923def clean_caches(options, isolate_cache, named_cache_manager):
maruele6fc9382017-05-04 09:03:48 -0700924 """Trims isolated and named caches.
925
926 The goal here is to coherently trim both caches, deleting older items
927 independent of which container they belong to.
928 """
929 # TODO(maruel): Trim CIPD cache the same way.
930 total = 0
nodirf33b8d62016-10-26 22:34:58 -0700931 with named_cache_manager.open():
932 oldest_isolated = isolate_cache.get_oldest()
933 oldest_named = named_cache_manager.get_oldest()
934 trimmers = [
935 (
936 isolate_cache.trim,
937 isolate_cache.get_timestamp(oldest_isolated) if oldest_isolated else 0,
938 ),
939 (
940 lambda: named_cache_manager.trim(options.min_free_space),
941 named_cache_manager.get_timestamp(oldest_named) if oldest_named else 0,
942 ),
943 ]
944 trimmers.sort(key=lambda (_, ts): ts)
maruele6fc9382017-05-04 09:03:48 -0700945 # TODO(maruel): This is incorrect, we want to trim 'items' that are strictly
946 # the oldest independent of in which cache they live in. Right now, the
947 # cache with the oldest item pays the price.
nodirf33b8d62016-10-26 22:34:58 -0700948 for trim, _ in trimmers:
maruele6fc9382017-05-04 09:03:48 -0700949 total += trim()
nodirf33b8d62016-10-26 22:34:58 -0700950 isolate_cache.cleanup()
maruele6fc9382017-05-04 09:03:48 -0700951 return total
nodirf33b8d62016-10-26 22:34:58 -0700952
953
nodirbe642ff2016-06-09 15:51:51 -0700954def create_option_parser():
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -0400955 parser = logging_utils.OptionParserWithLogging(
nodir55be77b2016-05-03 09:39:57 -0700956 usage='%prog <options> [command to run or extra args]',
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000957 version=__version__,
958 log_file=RUN_ISOLATED_LOG_FILE)
maruela9cfd6f2015-09-15 11:03:15 -0700959 parser.add_option(
maruel36a963d2016-04-08 17:15:49 -0700960 '--clean', action='store_true',
961 help='Cleans the cache, trimming it necessary and remove corrupted items '
962 'and returns without executing anything; use with -v to know what '
963 'was done')
964 parser.add_option(
maruel2e8d0f52016-07-16 07:51:29 -0700965 '--no-clean', action='store_true',
966 help='Do not clean the cache automatically on startup. This is meant for '
967 'bots where a separate execution with --clean was done earlier so '
968 'doing it again is redundant')
969 parser.add_option(
maruel4409e302016-07-19 14:25:51 -0700970 '--use-symlinks', action='store_true',
971 help='Use symlinks instead of hardlinks')
972 parser.add_option(
maruela9cfd6f2015-09-15 11:03:15 -0700973 '--json',
974 help='dump output metadata to json file. When used, run_isolated returns '
975 'non-zero only on internal failure')
maruel6be7f9e2015-10-01 12:25:30 -0700976 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -0800977 '--hard-timeout', type='float', help='Enforce hard timeout in execution')
maruel6be7f9e2015-10-01 12:25:30 -0700978 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -0800979 '--grace-period', type='float',
maruel6be7f9e2015-10-01 12:25:30 -0700980 help='Grace period between SIGTERM and SIGKILL')
bpastene3ae09522016-06-10 17:12:59 -0700981 parser.add_option(
Marc-Antoine Ruel49e347d2017-10-24 16:52:02 -0700982 '--raw-cmd', action='store_true',
983 help='Ignore the isolated command, use the one supplied at the command '
984 'line')
985 parser.add_option(
bpastene3ae09522016-06-10 17:12:59 -0700986 '--bot-file',
987 help='Path to a file describing the state of the host. The content is '
988 'defined by on_before_task() in bot_config.')
aludwin7556e0c2016-10-26 08:46:10 -0700989 parser.add_option(
vadimsh9c54b2c2017-07-25 14:08:29 -0700990 '--switch-to-account',
991 help='If given, switches LUCI_CONTEXT to given logical service account '
992 '(e.g. "task" or "system") before launching the isolated process.')
993 parser.add_option(
aludwin0a8e17d2016-10-27 15:57:39 -0700994 '--output', action='append',
995 help='Specifies an output to return. If no outputs are specified, all '
996 'files located in $(ISOLATED_OUTDIR) will be returned; '
997 'otherwise, outputs in both $(ISOLATED_OUTDIR) and those '
998 'specified by --output option (there can be multiple) will be '
999 'returned. Note that if a file in OUT_DIR has the same path '
1000 'as an --output option, the --output version will be returned.')
1001 parser.add_option(
aludwin7556e0c2016-10-26 08:46:10 -07001002 '-a', '--argsfile',
1003 # This is actually handled in parse_args; it's included here purely so it
1004 # can make it into the help text.
1005 help='Specify a file containing a JSON array of arguments to this '
1006 'script. If --argsfile is provided, no other argument may be '
1007 'provided on the command line.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001008 data_group = optparse.OptionGroup(parser, 'Data source')
1009 data_group.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -05001010 '-s', '--isolated',
nodir55be77b2016-05-03 09:39:57 -07001011 help='Hash of the .isolated to grab from the isolate server.')
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05001012 isolateserver.add_isolate_server_options(data_group)
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001013 parser.add_option_group(data_group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001014
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04001015 isolateserver.add_cache_options(parser)
nodirbe642ff2016-06-09 15:51:51 -07001016
1017 cipd.add_cipd_options(parser)
nodirf33b8d62016-10-26 22:34:58 -07001018 named_cache.add_named_cache_options(parser)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001019
Kenneth Russell61d42352014-09-15 11:41:16 -07001020 debug_group = optparse.OptionGroup(parser, 'Debugging')
1021 debug_group.add_option(
1022 '--leak-temp-dir',
1023 action='store_true',
nodirbe642ff2016-06-09 15:51:51 -07001024 help='Deliberately leak isolate\'s temp dir for later examination. '
1025 'Default: %default')
marueleb5fbee2015-09-17 13:01:36 -07001026 debug_group.add_option(
1027 '--root-dir', help='Use a directory instead of a random one')
Kenneth Russell61d42352014-09-15 11:41:16 -07001028 parser.add_option_group(debug_group)
1029
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001030 auth.add_auth_options(parser)
nodirbe642ff2016-06-09 15:51:51 -07001031
nodirf33b8d62016-10-26 22:34:58 -07001032 parser.set_defaults(
1033 cache='cache',
1034 cipd_cache='cipd_cache',
1035 named_cache_root='named_caches')
nodirbe642ff2016-06-09 15:51:51 -07001036 return parser
1037
1038
aludwin7556e0c2016-10-26 08:46:10 -07001039def parse_args(args):
1040 # Create a fake mini-parser just to get out the "-a" command. Note that
1041 # it's not documented here; instead, it's documented in create_option_parser
1042 # even though that parser will never actually get to parse it. This is
1043 # because --argsfile is exclusive with all other options and arguments.
1044 file_argparse = argparse.ArgumentParser(add_help=False)
1045 file_argparse.add_argument('-a', '--argsfile')
1046 (file_args, nonfile_args) = file_argparse.parse_known_args(args)
1047 if file_args.argsfile:
1048 if nonfile_args:
1049 file_argparse.error('Can\'t specify --argsfile with'
1050 'any other arguments (%s)' % nonfile_args)
1051 try:
1052 with open(file_args.argsfile, 'r') as f:
1053 args = json.load(f)
1054 except (IOError, OSError, ValueError) as e:
1055 # We don't need to error out here - "args" is now empty,
1056 # so the call below to parser.parse_args(args) will fail
1057 # and print the full help text.
1058 print >> sys.stderr, 'Couldn\'t read arguments: %s' % e
1059
1060 # Even if we failed to read the args, just call the normal parser now since it
1061 # will print the correct help message.
nodirbe642ff2016-06-09 15:51:51 -07001062 parser = create_option_parser()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001063 options, args = parser.parse_args(args)
aludwin7556e0c2016-10-26 08:46:10 -07001064 return (parser, options, args)
1065
1066
1067def main(args):
1068 (parser, options, args) = parse_args(args)
maruel36a963d2016-04-08 17:15:49 -07001069
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001070 if not file_path.enable_symlink():
1071 logging.error('Symlink support is not enabled')
1072
nodirf33b8d62016-10-26 22:34:58 -07001073 isolate_cache = isolateserver.process_cache_options(options, trim=False)
1074 named_cache_manager = named_cache.process_named_cache_options(parser, options)
maruel36a963d2016-04-08 17:15:49 -07001075 if options.clean:
1076 if options.isolated:
1077 parser.error('Can\'t use --isolated with --clean.')
1078 if options.isolate_server:
1079 parser.error('Can\'t use --isolate-server with --clean.')
1080 if options.json:
1081 parser.error('Can\'t use --json with --clean.')
nodirf33b8d62016-10-26 22:34:58 -07001082 if options.named_caches:
1083 parser.error('Can\t use --named-cache with --clean.')
1084 clean_caches(options, isolate_cache, named_cache_manager)
maruel36a963d2016-04-08 17:15:49 -07001085 return 0
nodirf33b8d62016-10-26 22:34:58 -07001086
maruel2e8d0f52016-07-16 07:51:29 -07001087 if not options.no_clean:
nodirf33b8d62016-10-26 22:34:58 -07001088 clean_caches(options, isolate_cache, named_cache_manager)
maruel36a963d2016-04-08 17:15:49 -07001089
nodir55be77b2016-05-03 09:39:57 -07001090 if not options.isolated and not args:
1091 parser.error('--isolated or command to run is required.')
1092
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001093 auth.process_auth_options(parser, options)
nodir55be77b2016-05-03 09:39:57 -07001094
1095 isolateserver.process_isolate_server_options(
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001096 parser, options, True, False)
nodir55be77b2016-05-03 09:39:57 -07001097 if not options.isolate_server:
1098 if options.isolated:
1099 parser.error('--isolated requires --isolate-server')
1100 if ISOLATED_OUTDIR_PARAMETER in args:
1101 parser.error(
1102 '%s in args requires --isolate-server' % ISOLATED_OUTDIR_PARAMETER)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001103
nodir90bc8dc2016-06-15 13:35:21 -07001104 if options.root_dir:
1105 options.root_dir = unicode(os.path.abspath(options.root_dir))
maruel12e30012015-10-09 11:55:35 -07001106 if options.json:
1107 options.json = unicode(os.path.abspath(options.json))
nodir55be77b2016-05-03 09:39:57 -07001108
nodirbe642ff2016-06-09 15:51:51 -07001109 cipd.validate_cipd_options(parser, options)
1110
vadimsh232f5a82017-01-20 19:23:44 -08001111 install_packages_fn = noop_install_packages
vadimsh902948e2017-01-20 15:57:32 -08001112 if options.cipd_enabled:
iannuccib58d10d2017-03-18 02:00:25 -07001113 install_packages_fn = lambda run_dir: install_client_and_packages(
vadimsh902948e2017-01-20 15:57:32 -08001114 run_dir, cipd.parse_package_args(options.cipd_packages),
1115 options.cipd_server, options.cipd_client_package,
1116 options.cipd_client_version, cache_dir=options.cipd_cache)
nodirbe642ff2016-06-09 15:51:51 -07001117
nodird6160682017-02-02 13:03:35 -08001118 @contextlib.contextmanager
nodir0ae98b32017-05-11 13:21:53 -07001119 def install_named_caches(run_dir):
nodird6160682017-02-02 13:03:35 -08001120 # WARNING: this function depends on "options" variable defined in the outer
1121 # function.
nodir0ae98b32017-05-11 13:21:53 -07001122 caches = [
1123 (os.path.join(run_dir, unicode(relpath)), name)
1124 for name, relpath in options.named_caches
1125 ]
nodirf33b8d62016-10-26 22:34:58 -07001126 with named_cache_manager.open():
nodir0ae98b32017-05-11 13:21:53 -07001127 for path, name in caches:
1128 named_cache_manager.install(path, name)
nodird6160682017-02-02 13:03:35 -08001129 try:
1130 yield
1131 finally:
dnje289d132017-07-07 11:16:44 -07001132 # Uninstall each named cache, returning it to the cache pool. If an
1133 # uninstall fails for a given cache, it will remain in the task's
1134 # temporary space, get cleaned up by the Swarming bot, and be lost.
1135 #
1136 # If the Swarming bot cannot clean up the cache, it will handle it like
1137 # any other bot file that could not be removed.
nodir0ae98b32017-05-11 13:21:53 -07001138 with named_cache_manager.open():
1139 for path, name in caches:
dnje289d132017-07-07 11:16:44 -07001140 try:
1141 named_cache_manager.uninstall(path, name)
1142 except named_cache.Error:
1143 logging.exception('Error while removing named cache %r at %r. '
1144 'The cache will be lost.', path, name)
nodirf33b8d62016-10-26 22:34:58 -07001145
nodirbe642ff2016-06-09 15:51:51 -07001146 try:
nodir90bc8dc2016-06-15 13:35:21 -07001147 if options.isolate_server:
1148 storage = isolateserver.get_storage(
1149 options.isolate_server, options.namespace)
1150 with storage:
nodirf33b8d62016-10-26 22:34:58 -07001151 # Hashing schemes used by |storage| and |isolate_cache| MUST match.
1152 assert storage.hash_algo == isolate_cache.hash_algo
nodirbe642ff2016-06-09 15:51:51 -07001153 return run_tha_test(
maruelabec63c2017-04-26 11:53:24 -07001154 args,
nodirf33b8d62016-10-26 22:34:58 -07001155 options.isolated,
1156 storage,
1157 isolate_cache,
aludwin0a8e17d2016-10-27 15:57:39 -07001158 options.output,
nodir0ae98b32017-05-11 13:21:53 -07001159 install_named_caches,
nodirf33b8d62016-10-26 22:34:58 -07001160 options.leak_temp_dir,
1161 options.json, options.root_dir,
1162 options.hard_timeout,
1163 options.grace_period,
maruelabec63c2017-04-26 11:53:24 -07001164 options.bot_file,
vadimsh9c54b2c2017-07-25 14:08:29 -07001165 options.switch_to_account,
nodirf33b8d62016-10-26 22:34:58 -07001166 install_packages_fn,
Marc-Antoine Ruel49e347d2017-10-24 16:52:02 -07001167 options.use_symlinks,
1168 options.raw_cmd)
maruel4409e302016-07-19 14:25:51 -07001169 return run_tha_test(
maruelabec63c2017-04-26 11:53:24 -07001170 args,
nodirf33b8d62016-10-26 22:34:58 -07001171 options.isolated,
1172 None,
1173 isolate_cache,
aludwin0a8e17d2016-10-27 15:57:39 -07001174 options.output,
nodir0ae98b32017-05-11 13:21:53 -07001175 install_named_caches,
nodirf33b8d62016-10-26 22:34:58 -07001176 options.leak_temp_dir,
1177 options.json,
1178 options.root_dir,
1179 options.hard_timeout,
1180 options.grace_period,
maruelabec63c2017-04-26 11:53:24 -07001181 options.bot_file,
vadimsh9c54b2c2017-07-25 14:08:29 -07001182 options.switch_to_account,
nodirf33b8d62016-10-26 22:34:58 -07001183 install_packages_fn,
Marc-Antoine Ruel49e347d2017-10-24 16:52:02 -07001184 options.use_symlinks,
1185 options.raw_cmd)
nodirf33b8d62016-10-26 22:34:58 -07001186 except (cipd.Error, named_cache.Error) as ex:
nodirbe642ff2016-06-09 15:51:51 -07001187 print >> sys.stderr, ex.message
1188 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001189
1190
1191if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -07001192 subprocess42.inhibit_os_error_reporting()
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001193 # Ensure that we are always running with the correct encoding.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001194 fix_encoding.fix_encoding()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001195 sys.exit(main(sys.argv[1:]))