blob: 7daa9f2a700e41e40a2bc184633315b872049578 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
maruelea586f32016-04-05 11:11:33 -07002# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07003# Use of this source code is governed under the Apache License, Version 2.0
4# that can be found in the LICENSE file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00005
nodir55be77b2016-05-03 09:39:57 -07006"""Runs a command with optional isolated input/output.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +00008run_isolated takes cares of setting up a temporary environment, running a
9command, and tearing it down.
nodir55be77b2016-05-03 09:39:57 -070010
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +000011It handles downloading and uploading isolated files, mapping CIPD packages and
12reusing stateful named caches.
13
14The isolated files, CIPD packages and named caches are kept as a global LRU
15cache.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050016
Roberto Carrillo71ade6d2018-10-08 22:30:24 +000017Any ${EXECUTABLE_SUFFIX} on the command line or the environment variables passed
18with the --env option will be replaced with ".exe" string on Windows and "" on
19other platforms.
nodirbe642ff2016-06-09 15:51:51 -070020
Roberto Carrillo71ade6d2018-10-08 22:30:24 +000021Any ${ISOLATED_OUTDIR} on the command line or the environment variables passed
22with the --env option will be replaced by the location of a temporary directory
23upon execution of the command specified in the .isolated file. All content
24written to this directory will be uploaded upon termination and the .isolated
25file describing this directory will be printed to stdout.
bpastene447c1992016-06-20 15:21:47 -070026
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +000027Any ${SWARMING_BOT_FILE} on the command line or the environment variables passed
28with the --env option will be replaced by the value of the --bot-file parameter.
29This file is used by a swarming bot to communicate state of the host to tasks.
30It is written to by the swarming bot's on_before_task() hook in the swarming
31server's custom bot_config.py.
32
33See
34https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Magic-Values.md
35for all the variables.
36
37See
38https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/swarming_bot/config/bot_config.py
39for more information about bot_config.py.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000040"""
41
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +000042from __future__ import print_function
43
44__version__ = '1.0.1'
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000045
aludwin7556e0c2016-10-26 08:46:10 -070046import argparse
maruel064c0a32016-04-05 11:47:15 -070047import base64
iannucci96fcccc2016-08-30 15:52:22 -070048import collections
vadimsh232f5a82017-01-20 19:23:44 -080049import contextlib
Sadaf Matinkhoo10743a62018-03-29 16:28:58 -040050import errno
aludwin7556e0c2016-10-26 08:46:10 -070051import json
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000052import logging
53import optparse
54import os
Takuto Ikuta5c59a842020-01-24 03:05:24 +000055import platform
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040056import re
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000057import sys
58import tempfile
maruel064c0a32016-04-05 11:47:15 -070059import time
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000060
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000061from utils import tools
62tools.force_local_third_party()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000063
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000064# third_party/
65from depot_tools import fix_encoding
Takuto Ikuta6e2ff962019-10-29 12:35:27 +000066import six
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000067
68# pylint: disable=ungrouped-imports
69import auth
70import cipd
71import isolate_storage
72import isolateserver
73import local_caching
74from libs import luci_context
Vadim Shtayura6b555c12014-07-23 16:22:18 -070075from utils import file_path
maruel12e30012015-10-09 11:55:35 -070076from utils import fs
maruel064c0a32016-04-05 11:47:15 -070077from utils import large
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040078from utils import logging_utils
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040079from utils import on_error
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -050080from utils import subprocess42
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000081
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000082
maruele2f2cb82016-07-13 14:41:03 -070083# Magic variables that can be found in the isolate task command line.
84ISOLATED_OUTDIR_PARAMETER = '${ISOLATED_OUTDIR}'
85EXECUTABLE_SUFFIX_PARAMETER = '${EXECUTABLE_SUFFIX}'
86SWARMING_BOT_FILE_PARAMETER = '${SWARMING_BOT_FILE}'
87
88
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000089# The name of the log file to use.
90RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
91
maruele2f2cb82016-07-13 14:41:03 -070092
csharp@chromium.orge217f302012-11-22 16:51:53 +000093# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000094RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000095
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000096
maruele2f2cb82016-07-13 14:41:03 -070097# Use short names for temporary directories. This is driven by Windows, which
98# imposes a relatively short maximum path length of 260 characters, often
99# referred to as MAX_PATH. It is relatively easy to create files with longer
Marc-Antoine Ruel793bff32019-04-18 17:50:48 +0000100# path length. A use case is with recursive dependency trees like npm packages.
maruele2f2cb82016-07-13 14:41:03 -0700101#
102# It is recommended to start the script with a `root_dir` as short as
103# possible.
104# - ir stands for isolated_run
105# - io stands for isolated_out
106# - it stands for isolated_tmp
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000107# - ic stands for isolated_client
maruele2f2cb82016-07-13 14:41:03 -0700108ISOLATED_RUN_DIR = u'ir'
109ISOLATED_OUT_DIR = u'io'
110ISOLATED_TMP_DIR = u'it'
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000111ISOLATED_CLIENT_DIR = u'ic'
maruele2f2cb82016-07-13 14:41:03 -0700112
Takuto Ikuta02edca22019-11-29 10:04:51 +0000113# TODO(tikuta): take these parameter from luci-config?
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000114# Take revision from
115# https://ci.chromium.org/p/infra-internal/g/infra-packagers/console
Takuto Ikuta02edca22019-11-29 10:04:51 +0000116ISOLATED_PACKAGE = 'infra/tools/luci/isolated/${platform}'
Takuto Ikuta80da24a2020-02-04 08:10:10 +0000117ISOLATED_REVISION = 'git_revision:998a75a97562b8c40feec956be8b4274d96e2eea'
maruele2f2cb82016-07-13 14:41:03 -0700118
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400119# Keep synced with task_request.py
Lei Leife202df2019-06-11 17:33:34 +0000120CACHE_NAME_RE = re.compile(r'^[a-z0-9_]{1,4096}$')
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400121
122
marueld928c862017-06-08 08:20:04 -0700123OUTLIVING_ZOMBIE_MSG = """\
124*** Swarming tried multiple times to delete the %s directory and failed ***
125*** Hard failing the task ***
126
127Swarming detected that your testing script ran an executable, which may have
128started a child executable, and the main script returned early, leaving the
129children executables playing around unguided.
130
131You don't want to leave children processes outliving the task on the Swarming
132bot, do you? The Swarming bot doesn't.
133
134How to fix?
135- For any process that starts children processes, make sure all children
136 processes terminated properly before each parent process exits. This is
137 especially important in very deep process trees.
138 - This must be done properly both in normal successful task and in case of
139 task failure. Cleanup is very important.
140- The Swarming bot sends a SIGTERM in case of timeout.
141 - You have %s seconds to comply after the signal was sent to the process
142 before the process is forcibly killed.
143- To achieve not leaking children processes in case of signals on timeout, you
144 MUST handle signals in each executable / python script and propagate them to
145 children processes.
146 - When your test script (python or binary) receives a signal like SIGTERM or
147 CTRL_BREAK_EVENT on Windows), send it to all children processes and wait for
148 them to terminate before quitting.
149
150See
Marc-Antoine Ruelc7243592018-05-24 17:04:04 -0400151https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Bot.md#Graceful-termination_aka-the-SIGTERM-and-SIGKILL-dance
marueld928c862017-06-08 08:20:04 -0700152for more information.
153
154*** May the SIGKILL force be with you ***
155"""
156
157
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000158# Currently hardcoded. Eventually could be exposed as a flag once there's value.
159# 3 weeks
160MAX_AGE_SECS = 21*24*60*60
161
162
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500163TaskData = collections.namedtuple(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000164 'TaskData',
165 [
Takuto Ikuta9a319502019-11-26 07:40:14 +0000166 # List of strings; the command line to use, independent of what was
167 # specified in the isolated file.
168 'command',
169 # Relative directory to start command into.
170 'relative_cwd',
171 # List of strings; the arguments to add to the command specified in the
172 # isolated file.
173 'extra_args',
174 # Hash of the .isolated file that must be retrieved to recreate the tree
175 # of files to run the target executable. The command specified in the
176 # .isolated is executed. Mutually exclusive with command argument.
177 'isolated_hash',
178 # isolateserver.Storage instance to retrieve remote objects. This object
179 # has a reference to an isolateserver.StorageApi, which does the actual
180 # I/O.
181 'storage',
182 # isolateserver.LocalCache instance to keep from retrieving the same
183 # objects constantly by caching the objects retrieved. Can be on-disk or
184 # in-memory.
185 'isolate_cache',
186 # List of paths relative to root_dir to put into the output isolated
187 # bundle upon task completion (see link_outputs_to_outdir).
188 'outputs',
189 # Function (run_dir) => context manager that installs named caches into
190 # |run_dir|.
191 'install_named_caches',
192 # If True, the temporary directory will be deliberately leaked for later
193 # examination.
194 'leak_temp_dir',
195 # Path to the directory to use to create the temporary directory. If not
196 # specified, a random temporary directory is created.
197 'root_dir',
198 # Kills the process if it lasts more than this amount of seconds.
199 'hard_timeout',
200 # Number of seconds to wait between SIGTERM and SIGKILL.
201 'grace_period',
202 # Path to a file with bot state, used in place of ${SWARMING_BOT_FILE}
203 # task command line argument.
204 'bot_file',
205 # Logical account to switch LUCI_CONTEXT into.
206 'switch_to_account',
207 # Context manager dir => CipdInfo, see install_client_and_packages.
208 'install_packages_fn',
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000209 # Use go isolated client.
210 'use_go_isolated',
Takuto Ikuta057c5342019-12-03 04:05:05 +0000211 # Cache directory for go isolated client.
212 'go_cache_dir',
Takuto Ikuta879788c2020-01-10 08:00:26 +0000213 # Parameters passed to go isolated client.
214 'go_cache_policies',
Takuto Ikuta9a319502019-11-26 07:40:14 +0000215 # Environment variables to set.
216 'env',
217 # Environment variables to mutate with relative directories.
218 # Example: {"ENV_KEY": ['relative', 'paths', 'to', 'prepend']}
219 'env_prefix',
220 # Lowers the task process priority.
221 'lower_priority',
222 # subprocess42.Containment instance. Can be None.
223 'containment',
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000224 ])
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500225
226
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -0500227def _to_str(s):
228 """Downgrades a unicode instance to str. Pass str through as-is."""
229 if isinstance(s, str):
230 return s
231 # This is technically incorrect, especially on Windows. In theory
232 # sys.getfilesystemencoding() should be used to use the right 'ANSI code
233 # page' on Windows, but that causes other problems, as the character set
234 # is very limited.
235 return s.encode('utf-8')
236
237
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500238def _to_unicode(s):
239 """Upgrades a str instance to unicode. Pass unicode through as-is."""
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000240 if isinstance(s, six.text_type) or s is None:
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500241 return s
242 return s.decode('utf-8')
243
244
maruel03e11842016-07-14 10:50:16 -0700245def make_temp_dir(prefix, root_dir):
246 """Returns a new unique temporary directory."""
Takuto Ikuta6e2ff962019-10-29 12:35:27 +0000247 return six.text_type(tempfile.mkdtemp(prefix=prefix, dir=root_dir))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000248
249
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500250def change_tree_read_only(rootdir, read_only):
251 """Changes the tree read-only bits according to the read_only specification.
252
253 The flag can be 0, 1 or 2, which will affect the possibility to modify files
254 and create or delete files.
255 """
256 if read_only == 2:
257 # Files and directories (except on Windows) are marked read only. This
258 # inhibits modifying, creating or deleting files in the test directory,
259 # except on Windows where creating and deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400260 file_path.make_tree_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500261 elif read_only == 1:
262 # Files are marked read only but not the directories. This inhibits
263 # modifying files but creating or deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400264 file_path.make_tree_files_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500265 elif read_only in (0, None):
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500266 # Anything can be modified.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400267 # TODO(maruel): This is currently dangerous as long as
268 # DiskContentAddressedCache.touch() is not yet changed to verify the hash of
269 # the content of the files it is looking at, so that if a test modifies an
270 # input file, the file must be deleted.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400271 file_path.make_tree_writeable(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500272 else:
273 raise ValueError(
274 'change_tree_read_only(%s, %s): Unknown flag %s' %
275 (rootdir, read_only, read_only))
276
277
vadimsh9c54b2c2017-07-25 14:08:29 -0700278@contextlib.contextmanager
279def set_luci_context_account(account, tmp_dir):
280 """Sets LUCI_CONTEXT account to be used by the task.
281
282 If 'account' is None or '', does nothing at all. This happens when
283 run_isolated.py is called without '--switch-to-account' flag. In this case,
284 if run_isolated.py is running in some LUCI_CONTEXT environment, the task will
Takuto Ikuta33e2ff32019-09-30 12:44:03 +0000285 just inherit whatever account is already set. This may happen if users invoke
vadimsh9c54b2c2017-07-25 14:08:29 -0700286 run_isolated.py explicitly from their code.
287
288 If the requested account is not defined in the context, switches to
289 non-authenticated access. This happens for Swarming tasks that don't use
290 'task' service accounts.
291
292 If not using LUCI_CONTEXT-based auth, does nothing.
293 If already running as requested account, does nothing.
294 """
295 if not account:
296 # Not actually switching.
297 yield
298 return
299
300 local_auth = luci_context.read('local_auth')
301 if not local_auth:
302 # Not using LUCI_CONTEXT auth at all.
303 yield
304 return
305
306 # See LUCI_CONTEXT.md for the format of 'local_auth'.
307 if local_auth.get('default_account_id') == account:
308 # Already set, no need to switch.
309 yield
310 return
311
312 available = {a['id'] for a in local_auth.get('accounts') or []}
313 if account in available:
314 logging.info('Switching default LUCI_CONTEXT account to %r', account)
315 local_auth['default_account_id'] = account
316 else:
317 logging.warning(
318 'Requested LUCI_CONTEXT account %r is not available (have only %r), '
319 'disabling authentication', account, sorted(available))
320 local_auth.pop('default_account_id', None)
321
322 with luci_context.write(_tmpdir=tmp_dir, local_auth=local_auth):
323 yield
324
325
nodir90bc8dc2016-06-15 13:35:21 -0700326def process_command(command, out_dir, bot_file):
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000327 """Replaces parameters in a command line.
nodirbe642ff2016-06-09 15:51:51 -0700328
329 Raises:
330 ValueError if a parameter is requested in |command| but its value is not
331 provided.
332 """
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000333 return [replace_parameters(arg, out_dir, bot_file) for arg in command]
334
335
336def replace_parameters(arg, out_dir, bot_file):
337 """Replaces parameter tokens with appropriate values in a string.
338
339 Raises:
340 ValueError if a parameter is requested in |arg| but its value is not
341 provided.
342 """
343 arg = arg.replace(EXECUTABLE_SUFFIX_PARAMETER, cipd.EXECUTABLE_SUFFIX)
344 replace_slash = False
345 if ISOLATED_OUTDIR_PARAMETER in arg:
346 if not out_dir:
347 raise ValueError(
348 'output directory is requested in command or env var, but not '
349 'provided; please specify one')
350 arg = arg.replace(ISOLATED_OUTDIR_PARAMETER, out_dir)
351 replace_slash = True
352 if SWARMING_BOT_FILE_PARAMETER in arg:
353 if bot_file:
354 arg = arg.replace(SWARMING_BOT_FILE_PARAMETER, bot_file)
nodirbe642ff2016-06-09 15:51:51 -0700355 replace_slash = True
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000356 else:
357 logging.warning('SWARMING_BOT_FILE_PARAMETER found in command or env '
358 'var, but no bot_file specified. Leaving parameter '
359 'unchanged.')
360 if replace_slash:
361 # Replace slashes only if parameters are present
362 # because of arguments like '${ISOLATED_OUTDIR}/foo/bar'
363 arg = arg.replace('/', os.sep)
364 return arg
maruela9cfd6f2015-09-15 11:03:15 -0700365
366
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000367
368def get_command_env(tmp_dir, cipd_info, run_dir, env, env_prefixes, out_dir,
369 bot_file):
vadimsh232f5a82017-01-20 19:23:44 -0800370 """Returns full OS environment to run a command in.
371
Robert Iannuccibf5f84c2017-11-22 12:56:50 -0800372 Sets up TEMP, puts directory with cipd binary in front of PATH, exposes
373 CIPD_CACHE_DIR env var, and installs all env_prefixes.
vadimsh232f5a82017-01-20 19:23:44 -0800374
375 Args:
376 tmp_dir: temp directory.
377 cipd_info: CipdInfo object is cipd client is used, None if not.
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500378 run_dir: The root directory the isolated tree is mapped in.
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500379 env: environment variables to use
Robert Iannuccibf5f84c2017-11-22 12:56:50 -0800380 env_prefixes: {"ENV_KEY": ['cwd', 'relative', 'paths', 'to', 'prepend']}
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000381 out_dir: Isolated output directory. Required to be != None if any of the
382 env vars contain ISOLATED_OUTDIR_PARAMETER.
383 bot_file: Required to be != None if any of the env vars contain
384 SWARMING_BOT_FILE_PARAMETER.
vadimsh232f5a82017-01-20 19:23:44 -0800385 """
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500386 out = os.environ.copy()
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000387 for k, v in env.items():
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500388 if not v:
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500389 out.pop(k, None)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500390 else:
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000391 out[k] = replace_parameters(v, out_dir, bot_file)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500392
393 if cipd_info:
394 bin_dir = os.path.dirname(cipd_info.client.binary_path)
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -0500395 out['PATH'] = '%s%s%s' % (_to_str(bin_dir), os.pathsep, out['PATH'])
396 out['CIPD_CACHE_DIR'] = _to_str(cipd_info.cache_dir)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500397
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000398 for key, paths in env_prefixes.items():
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500399 assert isinstance(paths, list), paths
400 paths = [os.path.normpath(os.path.join(run_dir, p)) for p in paths]
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500401 cur = out.get(key)
402 if cur:
403 paths.append(cur)
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -0500404 out[key] = _to_str(os.path.pathsep.join(paths))
vadimsh232f5a82017-01-20 19:23:44 -0800405
Marc-Antoine Ruelefb30b12018-07-25 18:34:36 +0000406 tmp_dir = _to_str(tmp_dir)
407 # pylint: disable=line-too-long
408 # * python respects $TMPDIR, $TEMP, and $TMP in this order, regardless of
409 # platform. So $TMPDIR must be set on all platforms.
410 # https://github.com/python/cpython/blob/2.7/Lib/tempfile.py#L155
411 out['TMPDIR'] = tmp_dir
412 if sys.platform == 'win32':
413 # * chromium's base utils uses GetTempPath().
414 # https://cs.chromium.org/chromium/src/base/files/file_util_win.cc?q=GetTempPath
415 # * Go uses GetTempPath().
416 # * GetTempDir() uses %TMP%, then %TEMP%, then other stuff. So %TMP% must be
417 # set.
418 # https://docs.microsoft.com/en-us/windows/desktop/api/fileapi/nf-fileapi-gettemppathw
419 out['TMP'] = tmp_dir
420 # https://blogs.msdn.microsoft.com/oldnewthing/20150417-00/?p=44213
421 out['TEMP'] = tmp_dir
422 elif sys.platform == 'darwin':
423 # * Chromium uses an hack on macOS before calling into
424 # NSTemporaryDirectory().
425 # https://cs.chromium.org/chromium/src/base/files/file_util_mac.mm?q=GetTempDir
426 # https://developer.apple.com/documentation/foundation/1409211-nstemporarydirectory
427 out['MAC_CHROMIUM_TMPDIR'] = tmp_dir
428 else:
429 # TMPDIR is specified as the POSIX standard envvar for the temp directory.
430 # http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
431 # * mktemp on linux respects $TMPDIR.
432 # * Chromium respects $TMPDIR on linux.
433 # https://cs.chromium.org/chromium/src/base/files/file_util_posix.cc?q=GetTempDir
434 # * Go uses $TMPDIR.
435 # https://go.googlesource.com/go/+/go1.10.3/src/os/file_unix.go#307
436 pass
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500437 return out
vadimsh232f5a82017-01-20 19:23:44 -0800438
439
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000440def run_command(
441 command, cwd, env, hard_timeout, grace_period, lower_priority, containment):
maruel6be7f9e2015-10-01 12:25:30 -0700442 """Runs the command.
443
444 Returns:
445 tuple(process exit code, bool if had a hard timeout)
446 """
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000447 logging.info(
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000448 'run_command(%s, %s, %s, %s, %s, %s)',
449 command, cwd, hard_timeout, grace_period, lower_priority, containment)
marueleb5fbee2015-09-17 13:01:36 -0700450
maruel6be7f9e2015-10-01 12:25:30 -0700451 exit_code = None
452 had_hard_timeout = False
maruela9cfd6f2015-09-15 11:03:15 -0700453 with tools.Profiler('RunTest'):
maruel6be7f9e2015-10-01 12:25:30 -0700454 proc = None
455 had_signal = []
maruela9cfd6f2015-09-15 11:03:15 -0700456 try:
maruel6be7f9e2015-10-01 12:25:30 -0700457 # TODO(maruel): This code is imperfect. It doesn't handle well signals
458 # during the download phase and there's short windows were things can go
459 # wrong.
460 def handler(signum, _frame):
461 if proc and not had_signal:
462 logging.info('Received signal %d', signum)
463 had_signal.append(True)
maruel556d9052015-10-05 11:12:44 -0700464 raise subprocess42.TimeoutExpired(command, None)
maruel6be7f9e2015-10-01 12:25:30 -0700465
Marc-Antoine Ruel30b80fe2019-02-08 13:51:31 +0000466 proc = subprocess42.Popen(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000467 command, cwd=cwd, env=env, detached=True, close_fds=True,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000468 lower_priority=lower_priority, containment=containment)
maruel6be7f9e2015-10-01 12:25:30 -0700469 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
470 try:
John Budorickc398f092019-06-10 22:49:44 +0000471 exit_code = proc.wait(hard_timeout or None)
maruel6be7f9e2015-10-01 12:25:30 -0700472 except subprocess42.TimeoutExpired:
473 if not had_signal:
474 logging.warning('Hard timeout')
475 had_hard_timeout = True
476 logging.warning('Sending SIGTERM')
477 proc.terminate()
478
479 # Ignore signals in grace period. Forcibly give the grace period to the
480 # child process.
481 if exit_code is None:
482 ignore = lambda *_: None
483 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
484 try:
485 exit_code = proc.wait(grace_period or None)
486 except subprocess42.TimeoutExpired:
487 # Now kill for real. The user can distinguish between the
488 # following states:
489 # - signal but process exited within grace period,
490 # hard_timed_out will be set but the process exit code will be
491 # script provided.
492 # - processed exited late, exit code will be -9 on posix.
493 logging.warning('Grace exhausted; sending SIGKILL')
494 proc.kill()
martiniss5c8043e2017-08-01 17:09:43 -0700495 logging.info('Waiting for process exit')
maruel6be7f9e2015-10-01 12:25:30 -0700496 exit_code = proc.wait()
maruela9cfd6f2015-09-15 11:03:15 -0700497 except OSError:
498 # This is not considered to be an internal error. The executable simply
499 # does not exit.
maruela72f46e2016-02-24 11:05:45 -0800500 sys.stderr.write(
tikuta2d678212019-09-23 23:12:08 +0000501 '<The executable does not exist, a dependent library is missing or '
502 'the command line is too long>\n'
503 '<Check for missing .so/.dll in the .isolate or GN file or length of '
504 'command line args>\n'
maruela72f46e2016-02-24 11:05:45 -0800505 '<Command: %s>\n' % command)
506 if os.environ.get('SWARMING_TASK_ID'):
507 # Give an additional hint when running as a swarming task.
508 sys.stderr.write(
509 '<See the task\'s page for commands to help diagnose this issue '
510 'by reproducing the task locally>\n')
maruela9cfd6f2015-09-15 11:03:15 -0700511 exit_code = 1
512 logging.info(
513 'Command finished with exit code %d (%s)',
514 exit_code, hex(0xffffffff & exit_code))
maruel6be7f9e2015-10-01 12:25:30 -0700515 return exit_code, had_hard_timeout
maruela9cfd6f2015-09-15 11:03:15 -0700516
517
Takuto Ikuta879788c2020-01-10 08:00:26 +0000518def _fetch_and_map_with_go(isolated_hash, storage, outdir, go_cache_dir,
519 policies, isolated_client):
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000520 """
521 Fetches an isolated tree using go client, create the tree and returns
522 (bundle, stats).
523 """
524 start = time.time()
525 server_ref = storage.server_ref
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000526 result_json_handle, result_json_path = tempfile.mkstemp(
527 prefix=u'fetch-and-map-result-', suffix=u'.json')
528 os.close(result_json_handle)
529 try:
Takuto Ikuta3153e3b2020-02-18 06:11:47 +0000530 proc = subprocess42.Popen([
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000531 isolated_client,
532 'download',
533 '-isolate-server',
534 server_ref.url,
535 '-namespace',
536 server_ref.namespace,
537 '-isolated',
538 isolated_hash,
539
540 # flags for cache
541 '-cache-dir',
Takuto Ikuta057c5342019-12-03 04:05:05 +0000542 go_cache_dir,
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000543 '-cache-max-items',
Takuto Ikuta50bc0552019-12-03 03:26:46 +0000544 str(policies.max_items),
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000545 '-cache-max-size',
Takuto Ikuta50bc0552019-12-03 03:26:46 +0000546 str(policies.max_cache_size),
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000547 '-cache-min-free-space',
Takuto Ikuta50bc0552019-12-03 03:26:46 +0000548 str(policies.min_free_space),
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000549
550 # flags for output
551 '-output-dir',
552 outdir,
553 '-fetch-and-map-result-json',
554 result_json_path,
555 ])
Takuto Ikuta3153e3b2020-02-18 06:11:47 +0000556
557 while True:
558 # This is to prevent I/O timeout error during isolated setup.
559 try:
Takuto Ikutab64c5192020-02-19 02:46:34 +0000560 retcode = proc.wait(30)
561 if retcode != 0:
562 raise ValueError("retcode of isolated command is not 0: %s" % retcode)
Takuto Ikuta3153e3b2020-02-18 06:11:47 +0000563 break
564 except subprocess42.TimeoutExpired:
Takuto Ikutacfc572a2020-02-25 09:41:23 +0000565 print('still running isolated')
Takuto Ikuta3153e3b2020-02-18 06:11:47 +0000566 continue
567
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000568 with open(result_json_path) as json_file:
569 result_json = json.load(json_file)
570
571 isolated = result_json['isolated']
572 bundle = isolateserver.IsolatedBundle(filter_cb=None)
573 # Only following properties are used in caller.
574 bundle.command = isolated.get('command')
575 bundle.read_only = isolated.get('read_only')
576 bundle.relative_cwd = isolated.get('relative_cwd')
577
578 return bundle, {
579 'duration': time.time() - start,
580 'items_cold': result_json['items_cold'],
581 'items_hot': result_json['items_hot'],
582 }
583 finally:
584 fs.remove(result_json_path)
585
586
587# TODO(crbug.com/932396): remove this function.
Takuto Ikuta16fac4b2019-12-09 04:57:18 +0000588def fetch_and_map(isolated_hash, storage, cache, outdir):
maruel4409e302016-07-19 14:25:51 -0700589 """Fetches an isolated tree, create the tree and returns (bundle, stats)."""
nodir6f801882016-04-29 14:41:50 -0700590 start = time.time()
591 bundle = isolateserver.fetch_isolated(
592 isolated_hash=isolated_hash,
593 storage=storage,
594 cache=cache,
maruel4409e302016-07-19 14:25:51 -0700595 outdir=outdir,
Takuto Ikuta16fac4b2019-12-09 04:57:18 +0000596 use_symlinks=False)
Takuto Ikuta2b9640e2019-06-19 00:53:23 +0000597 hot = (collections.Counter(cache.used) -
598 collections.Counter(cache.added)).elements()
nodir6f801882016-04-29 14:41:50 -0700599 return bundle, {
600 'duration': time.time() - start,
nodir6f801882016-04-29 14:41:50 -0700601 'items_cold': base64.b64encode(large.pack(sorted(cache.added))),
Takuto Ikuta2b9640e2019-06-19 00:53:23 +0000602 'items_hot': base64.b64encode(large.pack(sorted(hot))),
nodir6f801882016-04-29 14:41:50 -0700603 }
604
605
aludwin0a8e17d2016-10-27 15:57:39 -0700606def link_outputs_to_outdir(run_dir, out_dir, outputs):
607 """Links any named outputs to out_dir so they can be uploaded.
608
609 Raises an error if the file already exists in that directory.
610 """
611 if not outputs:
612 return
613 isolateserver.create_directories(out_dir, outputs)
614 for o in outputs:
Sadaf Matinkhoo10743a62018-03-29 16:28:58 -0400615 copy_recursively(os.path.join(run_dir, o), os.path.join(out_dir, o))
616
617
618def copy_recursively(src, dst):
619 """Efficiently copies a file or directory from src_dir to dst_dir.
620
621 `item` may be a file, directory, or a symlink to a file or directory.
622 All symlinks are replaced with their targets, so the resulting
623 directory structure in dst_dir will never have any symlinks.
624
625 To increase speed, copy_recursively hardlinks individual files into the
626 (newly created) directory structure if possible, unlike Python's
627 shutil.copytree().
628 """
629 orig_src = src
630 try:
631 # Replace symlinks with their final target.
632 while fs.islink(src):
633 res = fs.readlink(src)
634 src = os.path.join(os.path.dirname(src), res)
635 # TODO(sadafm): Explicitly handle cyclic symlinks.
636
637 # Note that fs.isfile (which is a wrapper around os.path.isfile) throws
638 # an exception if src does not exist. A warning will be logged in that case.
639 if fs.isfile(src):
640 file_path.link_file(dst, src, file_path.HARDLINK_WITH_FALLBACK)
641 return
642
643 if not fs.exists(dst):
644 os.makedirs(dst)
645
646 for child in fs.listdir(src):
647 copy_recursively(os.path.join(src, child), os.path.join(dst, child))
648
649 except OSError as e:
650 if e.errno == errno.ENOENT:
651 logging.warning('Path %s does not exist or %s is a broken symlink',
652 src, orig_src)
653 else:
654 logging.info("Couldn't collect output file %s: %s", src, e)
aludwin0a8e17d2016-10-27 15:57:39 -0700655
656
Takuto Ikutab4aa8662019-09-17 05:54:36 +0000657def upload_then_delete(storage, out_dir, leak_temp_dir):
maruela9cfd6f2015-09-15 11:03:15 -0700658 """Deletes the temporary run directory and uploads results back.
659
660 Returns:
nodir6f801882016-04-29 14:41:50 -0700661 tuple(outputs_ref, success, stats)
maruel064c0a32016-04-05 11:47:15 -0700662 - outputs_ref: a dict referring to the results archived back to the isolated
663 server, if applicable.
664 - success: False if something occurred that means that the task must
665 forcibly be considered a failure, e.g. zombie processes were left
666 behind.
nodir6f801882016-04-29 14:41:50 -0700667 - stats: uploading stats.
maruela9cfd6f2015-09-15 11:03:15 -0700668 """
maruela9cfd6f2015-09-15 11:03:15 -0700669 # Upload out_dir and generate a .isolated file out of this directory. It is
670 # only done if files were written in the directory.
671 outputs_ref = None
maruel064c0a32016-04-05 11:47:15 -0700672 cold = []
673 hot = []
nodir6f801882016-04-29 14:41:50 -0700674 start = time.time()
675
maruel12e30012015-10-09 11:55:35 -0700676 if fs.isdir(out_dir) and fs.listdir(out_dir):
maruela9cfd6f2015-09-15 11:03:15 -0700677 with tools.Profiler('ArchiveOutput'):
678 try:
maruel064c0a32016-04-05 11:47:15 -0700679 results, f_cold, f_hot = isolateserver.archive_files_to_storage(
maruela9cfd6f2015-09-15 11:03:15 -0700680 storage, [out_dir], None)
681 outputs_ref = {
Marc-Antoine Rueld0868ec2018-11-28 20:47:29 +0000682 'isolated': results.values()[0],
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +0000683 'isolatedserver': storage.server_ref.url,
684 'namespace': storage.server_ref.namespace,
maruela9cfd6f2015-09-15 11:03:15 -0700685 }
maruel064c0a32016-04-05 11:47:15 -0700686 cold = sorted(i.size for i in f_cold)
687 hot = sorted(i.size for i in f_hot)
maruela9cfd6f2015-09-15 11:03:15 -0700688 except isolateserver.Aborted:
689 # This happens when a signal SIGTERM was received while uploading data.
690 # There is 2 causes:
691 # - The task was too slow and was about to be killed anyway due to
692 # exceeding the hard timeout.
693 # - The amount of data uploaded back is very large and took too much
694 # time to archive.
695 sys.stderr.write('Received SIGTERM while uploading')
696 # Re-raise, so it will be treated as an internal failure.
697 raise
nodir6f801882016-04-29 14:41:50 -0700698
699 success = False
maruela9cfd6f2015-09-15 11:03:15 -0700700 try:
maruel12e30012015-10-09 11:55:35 -0700701 if (not leak_temp_dir and fs.isdir(out_dir) and
maruel6eeea7d2015-09-16 12:17:42 -0700702 not file_path.rmtree(out_dir)):
maruela9cfd6f2015-09-15 11:03:15 -0700703 logging.error('Had difficulties removing out_dir %s', out_dir)
nodir6f801882016-04-29 14:41:50 -0700704 else:
705 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700706 except OSError as e:
707 # When this happens, it means there's a process error.
maruel12e30012015-10-09 11:55:35 -0700708 logging.exception('Had difficulties removing out_dir %s: %s', out_dir, e)
nodir6f801882016-04-29 14:41:50 -0700709 stats = {
710 'duration': time.time() - start,
711 'items_cold': base64.b64encode(large.pack(cold)),
712 'items_hot': base64.b64encode(large.pack(hot)),
713 }
714 return outputs_ref, success, stats
maruela9cfd6f2015-09-15 11:03:15 -0700715
716
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500717def map_and_run(data, constant_run_path):
nodir55be77b2016-05-03 09:39:57 -0700718 """Runs a command with optional isolated input/output.
719
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500720 Arguments:
721 - data: TaskData instance.
722 - constant_run_path: TODO
nodir55be77b2016-05-03 09:39:57 -0700723
724 Returns metadata about the result.
725 """
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000726
727 if data.isolate_cache:
728 download_stats = {
729 #'duration': 0.,
730 'initial_number_items': len(data.isolate_cache),
731 'initial_size': data.isolate_cache.total_size,
732 #'items_cold': '<large.pack()>',
733 #'items_hot': '<large.pack()>',
734 }
735 else:
736 # TODO(tikuta): take stats from state.json in this case too.
737 download_stats = {}
738
maruela9cfd6f2015-09-15 11:03:15 -0700739 result = {
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000740 'duration': None,
741 'exit_code': None,
742 'had_hard_timeout': False,
743 'internal_failure': 'run_isolated did not complete properly',
744 'stats': {
745 #'cipd': {
746 # 'duration': 0.,
747 # 'get_client_duration': 0.,
748 #},
749 'isolated': {
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000750 'download': download_stats,
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000751 #'upload': {
752 # 'duration': 0.,
753 # 'items_cold': '<large.pack()>',
754 # 'items_hot': '<large.pack()>',
755 #},
756 },
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000757 },
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000758 #'cipd_pins': {
759 # 'packages': [
760 # {'package_name': ..., 'version': ..., 'path': ...},
761 # ...
762 # ],
763 # 'client_package': {'package_name': ..., 'version': ...},
764 #},
765 'outputs_ref': None,
766 'version': 5,
maruela9cfd6f2015-09-15 11:03:15 -0700767 }
nodirbe642ff2016-06-09 15:51:51 -0700768
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500769 if data.root_dir:
Lei Leife202df2019-06-11 17:33:34 +0000770 file_path.ensure_tree(data.root_dir, 0o700)
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000771 elif data.use_go_isolated:
772 data = data._replace(root_dir=os.path.dirname(data.go_cache_dir))
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500773 elif data.isolate_cache.cache_dir:
774 data = data._replace(
775 root_dir=os.path.dirname(data.isolate_cache.cache_dir))
maruele2f2cb82016-07-13 14:41:03 -0700776 # See comment for these constants.
maruelcffa0542017-04-07 08:39:20 -0700777 # If root_dir is not specified, it is not constant.
778 # TODO(maruel): This is not obvious. Change this to become an error once we
779 # make the constant_run_path an exposed flag.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500780 if constant_run_path and data.root_dir:
781 run_dir = os.path.join(data.root_dir, ISOLATED_RUN_DIR)
maruel5c4eed82017-05-26 05:33:40 -0700782 if os.path.isdir(run_dir):
783 file_path.rmtree(run_dir)
Lei Leife202df2019-06-11 17:33:34 +0000784 os.mkdir(run_dir, 0o700)
maruelcffa0542017-04-07 08:39:20 -0700785 else:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500786 run_dir = make_temp_dir(ISOLATED_RUN_DIR, data.root_dir)
maruel03e11842016-07-14 10:50:16 -0700787 # storage should be normally set but don't crash if it is not. This can happen
788 # as Swarming task can run without an isolate server.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500789 out_dir = make_temp_dir(
790 ISOLATED_OUT_DIR, data.root_dir) if data.storage else None
791 tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, data.root_dir)
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000792 isolated_client_dir = make_temp_dir(ISOLATED_CLIENT_DIR, data.root_dir)
nodir55be77b2016-05-03 09:39:57 -0700793 cwd = run_dir
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -0500794 if data.relative_cwd:
795 cwd = os.path.normpath(os.path.join(cwd, data.relative_cwd))
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500796 command = data.command
nodir55be77b2016-05-03 09:39:57 -0700797 try:
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000798 with data.install_packages_fn(run_dir, isolated_client_dir) as cipd_info:
vadimsh232f5a82017-01-20 19:23:44 -0800799 if cipd_info:
800 result['stats']['cipd'] = cipd_info.stats
801 result['cipd_pins'] = cipd_info.pins
nodir90bc8dc2016-06-15 13:35:21 -0700802
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500803 if data.isolated_hash:
vadimsh232f5a82017-01-20 19:23:44 -0800804 isolated_stats = result['stats'].setdefault('isolated', {})
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000805 if data.use_go_isolated:
Takuto Ikuta90397ca2020-01-08 10:07:55 +0000806 bundle, stats = _fetch_and_map_with_go(
807 isolated_hash=data.isolated_hash,
808 storage=data.storage,
Takuto Ikuta90397ca2020-01-08 10:07:55 +0000809 outdir=run_dir,
810 go_cache_dir=data.go_cache_dir,
Takuto Ikuta879788c2020-01-10 08:00:26 +0000811 policies=data.go_cache_policies,
Takuto Ikuta90397ca2020-01-08 10:07:55 +0000812 isolated_client=os.path.join(isolated_client_dir,
813 'isolated' + cipd.EXECUTABLE_SUFFIX))
814 else:
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000815 bundle, stats = fetch_and_map(
816 isolated_hash=data.isolated_hash,
817 storage=data.storage,
818 cache=data.isolate_cache,
Takuto Ikuta16fac4b2019-12-09 04:57:18 +0000819 outdir=run_dir)
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000820 isolated_stats['download'].update(stats)
vadimsh232f5a82017-01-20 19:23:44 -0800821 change_tree_read_only(run_dir, bundle.read_only)
maruelabec63c2017-04-26 11:53:24 -0700822 # Inject the command
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500823 if not command and bundle.command:
824 command = bundle.command + data.extra_args
Marc-Antoine Rueld704a1f2017-10-31 10:51:23 -0400825 # Only set the relative directory if the isolated file specified a
826 # command, and no raw command was specified.
827 if bundle.relative_cwd:
828 cwd = os.path.normpath(os.path.join(cwd, bundle.relative_cwd))
maruelabec63c2017-04-26 11:53:24 -0700829
830 if not command:
831 # Handle this as a task failure, not an internal failure.
832 sys.stderr.write(
833 '<No command was specified!>\n'
834 '<Please secify a command when triggering your Swarming task>\n')
835 result['exit_code'] = 1
836 return result
nodirbe642ff2016-06-09 15:51:51 -0700837
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -0500838 if not cwd.startswith(run_dir):
839 # Handle this as a task failure, not an internal failure. This is a
840 # 'last chance' way to gate against directory escape.
841 sys.stderr.write('<Relative CWD is outside of run directory!>\n')
842 result['exit_code'] = 1
843 return result
844
845 if not os.path.isdir(cwd):
846 # Accepts relative_cwd that does not exist.
Lei Leife202df2019-06-11 17:33:34 +0000847 os.makedirs(cwd, 0o700)
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -0500848
vadimsh232f5a82017-01-20 19:23:44 -0800849 # If we have an explicit list of files to return, make sure their
850 # directories exist now.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500851 if data.storage and data.outputs:
852 isolateserver.create_directories(run_dir, data.outputs)
aludwin0a8e17d2016-10-27 15:57:39 -0700853
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500854 with data.install_named_caches(run_dir):
nodird6160682017-02-02 13:03:35 -0800855 sys.stdout.flush()
856 start = time.time()
857 try:
vadimsh9c54b2c2017-07-25 14:08:29 -0700858 # Need to switch the default account before 'get_command_env' call,
859 # so it can grab correct value of LUCI_CONTEXT env var.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500860 with set_luci_context_account(data.switch_to_account, tmp_dir):
861 env = get_command_env(
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000862 tmp_dir, cipd_info, run_dir, data.env, data.env_prefix, out_dir,
863 data.bot_file)
Brian Sheedy7a761172019-08-30 22:55:14 +0000864 command = tools.find_executable(command, env)
Robert Iannucci24ae76a2018-02-26 12:51:18 -0800865 command = process_command(command, out_dir, data.bot_file)
866 file_path.ensure_command_has_abs_path(command, cwd)
867
vadimsh9c54b2c2017-07-25 14:08:29 -0700868 result['exit_code'], result['had_hard_timeout'] = run_command(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000869 command, cwd, env, data.hard_timeout, data.grace_period,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000870 data.lower_priority, data.containment)
nodird6160682017-02-02 13:03:35 -0800871 finally:
872 result['duration'] = max(time.time() - start, 0)
Seth Koehler49139812017-12-19 13:59:33 -0500873
874 # We successfully ran the command, set internal_failure back to
875 # None (even if the command failed, it's not an internal error).
876 result['internal_failure'] = None
maruela9cfd6f2015-09-15 11:03:15 -0700877 except Exception as e:
nodir90bc8dc2016-06-15 13:35:21 -0700878 # An internal error occurred. Report accordingly so the swarming task will
879 # be retried automatically.
maruel12e30012015-10-09 11:55:35 -0700880 logging.exception('internal failure: %s', e)
maruela9cfd6f2015-09-15 11:03:15 -0700881 result['internal_failure'] = str(e)
882 on_error.report(None)
aludwin0a8e17d2016-10-27 15:57:39 -0700883
884 # Clean up
maruela9cfd6f2015-09-15 11:03:15 -0700885 finally:
886 try:
aludwin0a8e17d2016-10-27 15:57:39 -0700887 # Try to link files to the output directory, if specified.
888 if out_dir:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500889 link_outputs_to_outdir(run_dir, out_dir, data.outputs)
aludwin0a8e17d2016-10-27 15:57:39 -0700890
nodir32a1ec12016-10-26 18:34:07 -0700891 success = False
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500892 if data.leak_temp_dir:
nodir32a1ec12016-10-26 18:34:07 -0700893 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700894 logging.warning(
895 'Deliberately leaking %s for later examination', run_dir)
marueleb5fbee2015-09-17 13:01:36 -0700896 else:
maruel84537cb2015-10-16 14:21:28 -0700897 # On Windows rmtree(run_dir) call above has a synchronization effect: it
898 # finishes only when all task child processes terminate (since a running
899 # process locks *.exe file). Examine out_dir only after that call
900 # completes (since child processes may write to out_dir too and we need
901 # to wait for them to finish).
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000902 for directory in (run_dir, tmp_dir, isolated_client_dir):
Takuto Ikuta69c0d662019-11-27 01:18:08 +0000903 if not fs.isdir(directory):
904 continue
maruel84537cb2015-10-16 14:21:28 -0700905 try:
Takuto Ikuta69c0d662019-11-27 01:18:08 +0000906 success = file_path.rmtree(directory)
maruel84537cb2015-10-16 14:21:28 -0700907 except OSError as e:
Takuto Ikuta69c0d662019-11-27 01:18:08 +0000908 logging.error('rmtree(%r) failed: %s', directory, e)
maruel84537cb2015-10-16 14:21:28 -0700909 success = False
910 if not success:
Takuto Ikuta69c0d662019-11-27 01:18:08 +0000911 sys.stderr.write(
912 OUTLIVING_ZOMBIE_MSG % (directory, data.grace_period))
maruel84537cb2015-10-16 14:21:28 -0700913 if result['exit_code'] == 0:
914 result['exit_code'] = 1
maruela9cfd6f2015-09-15 11:03:15 -0700915
marueleb5fbee2015-09-17 13:01:36 -0700916 # This deletes out_dir if leak_temp_dir is not set.
nodir9130f072016-05-27 13:59:08 -0700917 if out_dir:
nodir55715712016-06-03 12:28:19 -0700918 isolated_stats = result['stats'].setdefault('isolated', {})
919 result['outputs_ref'], success, isolated_stats['upload'] = (
Takuto Ikutab4aa8662019-09-17 05:54:36 +0000920 upload_then_delete(data.storage, out_dir, data.leak_temp_dir))
maruela9cfd6f2015-09-15 11:03:15 -0700921 if not success and result['exit_code'] == 0:
922 result['exit_code'] = 1
923 except Exception as e:
924 # Swallow any exception in the main finally clause.
nodir9130f072016-05-27 13:59:08 -0700925 if out_dir:
926 logging.exception('Leaking out_dir %s: %s', out_dir, e)
maruela9cfd6f2015-09-15 11:03:15 -0700927 result['internal_failure'] = str(e)
928 return result
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500929
930
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500931def run_tha_test(data, result_json):
nodir55be77b2016-05-03 09:39:57 -0700932 """Runs an executable and records execution metadata.
933
nodir55be77b2016-05-03 09:39:57 -0700934 If isolated_hash is specified, downloads the dependencies in the cache,
935 hardlinks them into a temporary directory and runs the command specified in
936 the .isolated.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500937
938 A temporary directory is created to hold the output files. The content inside
939 this directory will be uploaded back to |storage| packaged as a .isolated
940 file.
941
942 Arguments:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500943 - data: TaskData instance.
944 - result_json: File path to dump result metadata into. If set, the process
945 exit code is always 0 unless an internal error occurred.
maruela9cfd6f2015-09-15 11:03:15 -0700946
947 Returns:
948 Process exit code that should be used.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000949 """
maruela76b9ee2015-12-15 06:18:08 -0800950 if result_json:
951 # Write a json output file right away in case we get killed.
952 result = {
953 'exit_code': None,
954 'had_hard_timeout': False,
955 'internal_failure': 'Was terminated before completion',
956 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700957 'version': 5,
maruela76b9ee2015-12-15 06:18:08 -0800958 }
959 tools.write_json(result_json, result, dense=True)
960
maruela9cfd6f2015-09-15 11:03:15 -0700961 # run_isolated exit code. Depends on if result_json is used or not.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500962 result = map_and_run(data, True)
maruela9cfd6f2015-09-15 11:03:15 -0700963 logging.info('Result:\n%s', tools.format_json(result, dense=True))
bpastene3ae09522016-06-10 17:12:59 -0700964
maruela9cfd6f2015-09-15 11:03:15 -0700965 if result_json:
maruel05d5a882015-09-21 13:59:02 -0700966 # We've found tests to delete 'work' when quitting, causing an exception
967 # here. Try to recreate the directory if necessary.
nodire5028a92016-04-29 14:38:21 -0700968 file_path.ensure_tree(os.path.dirname(result_json))
maruela9cfd6f2015-09-15 11:03:15 -0700969 tools.write_json(result_json, result, dense=True)
970 # Only return 1 if there was an internal error.
971 return int(bool(result['internal_failure']))
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000972
maruela9cfd6f2015-09-15 11:03:15 -0700973 # Marshall into old-style inline output.
974 if result['outputs_ref']:
Marc-Antoine Ruel793bff32019-04-18 17:50:48 +0000975 # pylint: disable=unsubscriptable-object
maruela9cfd6f2015-09-15 11:03:15 -0700976 data = {
977 'hash': result['outputs_ref']['isolated'],
978 'namespace': result['outputs_ref']['namespace'],
979 'storage': result['outputs_ref']['isolatedserver'],
980 }
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -0500981 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700982 print(
983 '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
984 tools.format_json(data, dense=True))
maruelb76604c2015-11-11 11:53:44 -0800985 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700986 return result['exit_code'] or int(bool(result['internal_failure']))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000987
988
iannuccib58d10d2017-03-18 02:00:25 -0700989# Yielded by 'install_client_and_packages'.
vadimsh232f5a82017-01-20 19:23:44 -0800990CipdInfo = collections.namedtuple('CipdInfo', [
991 'client', # cipd.CipdClient object
992 'cache_dir', # absolute path to bot-global cipd tag and instance cache
993 'stats', # dict with stats to return to the server
994 'pins', # dict with installed cipd pins to return to the server
995])
996
997
998@contextlib.contextmanager
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000999def noop_install_packages(_run_dir, _isolated_dir):
iannuccib58d10d2017-03-18 02:00:25 -07001000 """Placeholder for 'install_client_and_packages' if cipd is disabled."""
vadimsh232f5a82017-01-20 19:23:44 -08001001 yield None
1002
1003
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001004def _install_packages(run_dir, cipd_cache_dir, client, packages):
iannuccib58d10d2017-03-18 02:00:25 -07001005 """Calls 'cipd ensure' for packages.
1006
1007 Args:
1008 run_dir (str): root of installation.
1009 cipd_cache_dir (str): the directory to use for the cipd package cache.
1010 client (CipdClient): the cipd client to use
1011 packages: packages to install, list [(path, package_name, version), ...].
iannuccib58d10d2017-03-18 02:00:25 -07001012
1013 Returns: list of pinned packages. Looks like [
1014 {
1015 'path': 'subdirectory',
1016 'package_name': 'resolved/package/name',
1017 'version': 'deadbeef...',
1018 },
1019 ...
1020 ]
1021 """
1022 package_pins = [None]*len(packages)
1023 def insert_pin(path, name, version, idx):
1024 package_pins[idx] = {
1025 'package_name': name,
1026 # swarming deals with 'root' as '.'
1027 'path': path or '.',
1028 'version': version,
1029 }
1030
1031 by_path = collections.defaultdict(list)
1032 for i, (path, name, version) in enumerate(packages):
1033 # cipd deals with 'root' as ''
1034 if path == '.':
1035 path = ''
1036 by_path[path].append((name, version, i))
1037
1038 pins = client.ensure(
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001039 run_dir,
1040 {
1041 subdir: [(name, vers) for name, vers, _ in pkgs
1042 ] for subdir, pkgs in by_path.items()
1043 },
1044 cache_dir=cipd_cache_dir,
iannuccib58d10d2017-03-18 02:00:25 -07001045 )
1046
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001047 for subdir, pin_list in sorted(pins.items()):
iannuccib58d10d2017-03-18 02:00:25 -07001048 this_subdir = by_path[subdir]
1049 for i, (name, version) in enumerate(pin_list):
1050 insert_pin(subdir, name, version, this_subdir[i][2])
1051
Robert Iannucci461b30d2017-12-13 11:34:03 -08001052 assert None not in package_pins, (packages, pins, package_pins)
iannuccib58d10d2017-03-18 02:00:25 -07001053
1054 return package_pins
1055
1056
vadimsh232f5a82017-01-20 19:23:44 -08001057@contextlib.contextmanager
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001058def install_client_and_packages(run_dir, packages, service_url,
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001059 client_package_name, client_version, cache_dir,
1060 isolated_dir):
vadimsh902948e2017-01-20 15:57:32 -08001061 """Bootstraps CIPD client and installs CIPD packages.
iannucci96fcccc2016-08-30 15:52:22 -07001062
vadimsh232f5a82017-01-20 19:23:44 -08001063 Yields CipdClient, stats, client info and pins (as single CipdInfo object).
1064
1065 Pins and the CIPD client info are in the form of:
iannucci96fcccc2016-08-30 15:52:22 -07001066 [
1067 {
1068 "path": path, "package_name": package_name, "version": version,
1069 },
1070 ...
1071 ]
vadimsh902948e2017-01-20 15:57:32 -08001072 (the CIPD client info is a single dictionary instead of a list)
iannucci96fcccc2016-08-30 15:52:22 -07001073
1074 such that they correspond 1:1 to all input package arguments from the command
1075 line. These dictionaries make their all the way back to swarming, where they
1076 become the arguments of CipdPackage.
nodirbe642ff2016-06-09 15:51:51 -07001077
vadimsh902948e2017-01-20 15:57:32 -08001078 If 'packages' list is empty, will bootstrap CIPD client, but won't install
1079 any packages.
1080
1081 The bootstrapped client (regardless whether 'packages' list is empty or not),
vadimsh232f5a82017-01-20 19:23:44 -08001082 will be made available to the task via $PATH.
vadimsh902948e2017-01-20 15:57:32 -08001083
nodirbe642ff2016-06-09 15:51:51 -07001084 Args:
nodir90bc8dc2016-06-15 13:35:21 -07001085 run_dir (str): root of installation.
vadimsh902948e2017-01-20 15:57:32 -08001086 packages: packages to install, list [(path, package_name, version), ...].
nodirbe642ff2016-06-09 15:51:51 -07001087 service_url (str): CIPD server url, e.g.
1088 "https://chrome-infra-packages.appspot.com."
nodir90bc8dc2016-06-15 13:35:21 -07001089 client_package_name (str): CIPD package name of CIPD client.
1090 client_version (str): Version of CIPD client.
nodirbe642ff2016-06-09 15:51:51 -07001091 cache_dir (str): where to keep cache of cipd clients, packages and tags.
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001092 isolated_dir (str): where to download isolated client.
nodirbe642ff2016-06-09 15:51:51 -07001093 """
1094 assert cache_dir
nodir90bc8dc2016-06-15 13:35:21 -07001095
nodirbe642ff2016-06-09 15:51:51 -07001096 start = time.time()
nodirbe642ff2016-06-09 15:51:51 -07001097
vadimsh902948e2017-01-20 15:57:32 -08001098 cache_dir = os.path.abspath(cache_dir)
vadimsh232f5a82017-01-20 19:23:44 -08001099 cipd_cache_dir = os.path.join(cache_dir, 'cache') # tag and instance caches
nodir90bc8dc2016-06-15 13:35:21 -07001100 run_dir = os.path.abspath(run_dir)
vadimsh902948e2017-01-20 15:57:32 -08001101 packages = packages or []
nodir90bc8dc2016-06-15 13:35:21 -07001102
nodirbe642ff2016-06-09 15:51:51 -07001103 get_client_start = time.time()
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001104 client_manager = cipd.get_client(service_url, client_package_name,
1105 client_version, cache_dir)
iannucci96fcccc2016-08-30 15:52:22 -07001106
nodirbe642ff2016-06-09 15:51:51 -07001107 with client_manager as client:
1108 get_client_duration = time.time() - get_client_start
nodir90bc8dc2016-06-15 13:35:21 -07001109
iannuccib58d10d2017-03-18 02:00:25 -07001110 package_pins = []
1111 if packages:
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001112 package_pins = _install_packages(run_dir, cipd_cache_dir, client,
1113 packages)
iannuccib58d10d2017-03-18 02:00:25 -07001114
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001115 # Install isolated client to |isolated_dir|.
Takuto Ikuta02edca22019-11-29 10:04:51 +00001116 _install_packages(isolated_dir, cipd_cache_dir, client,
1117 [('', ISOLATED_PACKAGE, ISOLATED_REVISION)])
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001118
iannuccib58d10d2017-03-18 02:00:25 -07001119 file_path.make_tree_files_read_only(run_dir)
nodir90bc8dc2016-06-15 13:35:21 -07001120
vadimsh232f5a82017-01-20 19:23:44 -08001121 total_duration = time.time() - start
1122 logging.info(
1123 'Installing CIPD client and packages took %d seconds', total_duration)
nodir90bc8dc2016-06-15 13:35:21 -07001124
vadimsh232f5a82017-01-20 19:23:44 -08001125 yield CipdInfo(
1126 client=client,
1127 cache_dir=cipd_cache_dir,
1128 stats={
1129 'duration': total_duration,
1130 'get_client_duration': get_client_duration,
1131 },
1132 pins={
iannuccib58d10d2017-03-18 02:00:25 -07001133 'client_package': {
1134 'package_name': client.package_name,
1135 'version': client.instance_id,
1136 },
vadimsh232f5a82017-01-20 19:23:44 -08001137 'packages': package_pins,
1138 })
nodirbe642ff2016-06-09 15:51:51 -07001139
1140
1141def create_option_parser():
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04001142 parser = logging_utils.OptionParserWithLogging(
nodir55be77b2016-05-03 09:39:57 -07001143 usage='%prog <options> [command to run or extra args]',
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001144 version=__version__,
1145 log_file=RUN_ISOLATED_LOG_FILE)
maruela9cfd6f2015-09-15 11:03:15 -07001146 parser.add_option(
maruel36a963d2016-04-08 17:15:49 -07001147 '--clean', action='store_true',
1148 help='Cleans the cache, trimming it necessary and remove corrupted items '
1149 'and returns without executing anything; use with -v to know what '
1150 'was done')
1151 parser.add_option(
maruela9cfd6f2015-09-15 11:03:15 -07001152 '--json',
1153 help='dump output metadata to json file. When used, run_isolated returns '
1154 'non-zero only on internal failure')
maruel6be7f9e2015-10-01 12:25:30 -07001155 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -08001156 '--hard-timeout', type='float', help='Enforce hard timeout in execution')
maruel6be7f9e2015-10-01 12:25:30 -07001157 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -08001158 '--grace-period', type='float',
maruel6be7f9e2015-10-01 12:25:30 -07001159 help='Grace period between SIGTERM and SIGKILL')
bpastene3ae09522016-06-10 17:12:59 -07001160 parser.add_option(
Marc-Antoine Ruel49e347d2017-10-24 16:52:02 -07001161 '--raw-cmd', action='store_true',
1162 help='Ignore the isolated command, use the one supplied at the command '
1163 'line')
1164 parser.add_option(
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001165 '--relative-cwd',
1166 help='Ignore the isolated \'relative_cwd\' and use this one instead; '
1167 'requires --raw-cmd')
1168 parser.add_option(
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001169 '--env', default=[], action='append',
1170 help='Environment variables to set for the child process')
1171 parser.add_option(
1172 '--env-prefix', default=[], action='append',
Robert Iannuccibf5f84c2017-11-22 12:56:50 -08001173 help='Specify a VAR=./path/fragment to put in the environment variable '
1174 'before executing the command. The path fragment must be relative '
1175 'to the isolated run directory, and must not contain a `..` token. '
1176 'The path will be made absolute and prepended to the indicated '
1177 '$VAR using the OS\'s path separator. Multiple items for the same '
1178 '$VAR will be prepended in order.')
1179 parser.add_option(
bpastene3ae09522016-06-10 17:12:59 -07001180 '--bot-file',
1181 help='Path to a file describing the state of the host. The content is '
1182 'defined by on_before_task() in bot_config.')
aludwin7556e0c2016-10-26 08:46:10 -07001183 parser.add_option(
vadimsh9c54b2c2017-07-25 14:08:29 -07001184 '--switch-to-account',
1185 help='If given, switches LUCI_CONTEXT to given logical service account '
1186 '(e.g. "task" or "system") before launching the isolated process.')
1187 parser.add_option(
aludwin0a8e17d2016-10-27 15:57:39 -07001188 '--output', action='append',
1189 help='Specifies an output to return. If no outputs are specified, all '
1190 'files located in $(ISOLATED_OUTDIR) will be returned; '
1191 'otherwise, outputs in both $(ISOLATED_OUTDIR) and those '
1192 'specified by --output option (there can be multiple) will be '
1193 'returned. Note that if a file in OUT_DIR has the same path '
1194 'as an --output option, the --output version will be returned.')
1195 parser.add_option(
aludwin7556e0c2016-10-26 08:46:10 -07001196 '-a', '--argsfile',
1197 # This is actually handled in parse_args; it's included here purely so it
1198 # can make it into the help text.
1199 help='Specify a file containing a JSON array of arguments to this '
1200 'script. If --argsfile is provided, no other argument may be '
1201 'provided on the command line.')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001202
1203 group = optparse.OptionGroup(parser, 'Data source')
1204 group.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -05001205 '-s', '--isolated',
nodir55be77b2016-05-03 09:39:57 -07001206 help='Hash of the .isolated to grab from the isolate server.')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001207 isolateserver.add_isolate_server_options(group)
1208 parser.add_option_group(group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001209
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04001210 isolateserver.add_cache_options(parser)
nodirbe642ff2016-06-09 15:51:51 -07001211
1212 cipd.add_cipd_options(parser)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001213
1214 group = optparse.OptionGroup(parser, 'Named caches')
1215 group.add_option(
1216 '--named-cache',
1217 dest='named_caches',
1218 action='append',
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001219 nargs=3,
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001220 default=[],
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001221 help='A named cache to request. Accepts 3 arguments: name, path, hint. '
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001222 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. '
1223 'path is a path relative to the run dir where the cache directory '
1224 'must be put to. '
1225 'This option can be specified more than once.')
1226 group.add_option(
1227 '--named-cache-root', default='named_caches',
1228 help='Cache root directory. Default=%default')
1229 parser.add_option_group(group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001230
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001231 group = optparse.OptionGroup(parser, 'Process containment')
1232 parser.add_option(
1233 '--lower-priority', action='store_true',
1234 help='Lowers the child process priority')
1235 parser.add_option(
1236 '--containment-type', choices=('NONE', 'AUTO', 'JOB_OBJECT'),
1237 default='NONE',
1238 help='Type of container to use')
1239 parser.add_option(
1240 '--limit-processes', type='int', default=0,
1241 help='Maximum number of active processes in the containment')
1242 parser.add_option(
1243 '--limit-total-committed-memory', type='int', default=0,
1244 help='Maximum sum of committed memory in the containment')
1245 parser.add_option_group(group)
1246
1247 group = optparse.OptionGroup(parser, 'Debugging')
1248 group.add_option(
Kenneth Russell61d42352014-09-15 11:41:16 -07001249 '--leak-temp-dir',
1250 action='store_true',
nodirbe642ff2016-06-09 15:51:51 -07001251 help='Deliberately leak isolate\'s temp dir for later examination. '
1252 'Default: %default')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001253 group.add_option(
marueleb5fbee2015-09-17 13:01:36 -07001254 '--root-dir', help='Use a directory instead of a random one')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001255 parser.add_option_group(group)
Kenneth Russell61d42352014-09-15 11:41:16 -07001256
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001257 auth.add_auth_options(parser)
nodirbe642ff2016-06-09 15:51:51 -07001258
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001259 parser.set_defaults(cache='cache', cipd_cache='cipd_cache')
nodirbe642ff2016-06-09 15:51:51 -07001260 return parser
1261
1262
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001263def process_named_cache_options(parser, options, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001264 """Validates named cache options and returns a CacheManager."""
1265 if options.named_caches and not options.named_cache_root:
1266 parser.error('--named-cache is specified, but --named-cache-root is empty')
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001267 for name, path, hint in options.named_caches:
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001268 if not CACHE_NAME_RE.match(name):
1269 parser.error(
1270 'cache name %r does not match %r' % (name, CACHE_NAME_RE.pattern))
1271 if not path:
1272 parser.error('cache path cannot be empty')
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001273 try:
1274 long(hint)
1275 except ValueError:
1276 parser.error('cache hint must be a number')
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001277 if options.named_cache_root:
1278 # Make these configurable later if there is use case but for now it's fairly
1279 # safe values.
1280 # In practice, a fair chunk of bots are already recycled on a daily schedule
1281 # so this code doesn't have any effect to them, unless they are preloaded
1282 # with a really old cache.
1283 policies = local_caching.CachePolicies(
1284 # 1TiB.
1285 max_cache_size=1024*1024*1024*1024,
1286 min_free_space=options.min_free_space,
1287 max_items=50,
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001288 max_age_secs=MAX_AGE_SECS)
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001289 root_dir = six.text_type(os.path.abspath(options.named_cache_root))
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001290 return local_caching.NamedCache(root_dir, policies, time_fn=time_fn)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001291 return None
1292
1293
aludwin7556e0c2016-10-26 08:46:10 -07001294def parse_args(args):
1295 # Create a fake mini-parser just to get out the "-a" command. Note that
1296 # it's not documented here; instead, it's documented in create_option_parser
1297 # even though that parser will never actually get to parse it. This is
1298 # because --argsfile is exclusive with all other options and arguments.
1299 file_argparse = argparse.ArgumentParser(add_help=False)
1300 file_argparse.add_argument('-a', '--argsfile')
1301 (file_args, nonfile_args) = file_argparse.parse_known_args(args)
1302 if file_args.argsfile:
1303 if nonfile_args:
1304 file_argparse.error('Can\'t specify --argsfile with'
1305 'any other arguments (%s)' % nonfile_args)
1306 try:
1307 with open(file_args.argsfile, 'r') as f:
1308 args = json.load(f)
1309 except (IOError, OSError, ValueError) as e:
1310 # We don't need to error out here - "args" is now empty,
1311 # so the call below to parser.parse_args(args) will fail
1312 # and print the full help text.
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +00001313 print('Couldn\'t read arguments: %s' % e, file=sys.stderr)
aludwin7556e0c2016-10-26 08:46:10 -07001314
1315 # Even if we failed to read the args, just call the normal parser now since it
1316 # will print the correct help message.
nodirbe642ff2016-06-09 15:51:51 -07001317 parser = create_option_parser()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001318 options, args = parser.parse_args(args)
aludwin7556e0c2016-10-26 08:46:10 -07001319 return (parser, options, args)
1320
1321
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001322def _calc_named_cache_hint(named_cache, named_caches):
1323 """Returns the expected size of the missing named caches."""
1324 present = named_cache.available
1325 size = 0
1326 for name, _, hint in named_caches:
1327 if name not in present:
1328 hint = long(hint)
1329 if hint > 0:
1330 size += hint
1331 return size
1332
1333
aludwin7556e0c2016-10-26 08:46:10 -07001334def main(args):
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -05001335 # Warning: when --argsfile is used, the strings are unicode instances, when
1336 # parsed normally, the strings are str instances.
aludwin7556e0c2016-10-26 08:46:10 -07001337 (parser, options, args) = parse_args(args)
maruel36a963d2016-04-08 17:15:49 -07001338
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001339 if not file_path.enable_symlink():
Marc-Antoine Ruel5a024272019-01-15 20:11:16 +00001340 logging.warning('Symlink support is not enabled')
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001341
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001342 named_cache = process_named_cache_options(parser, options)
Marc-Antoine Ruel0d8b0f62018-09-10 14:40:35 +00001343 # hint is 0 if there's no named cache.
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001344 hint = _calc_named_cache_hint(named_cache, options.named_caches)
1345 if hint:
1346 # Increase the --min-free-space value by the hint, and recreate the
1347 # NamedCache instance so it gets the updated CachePolicy.
1348 options.min_free_space += hint
1349 named_cache = process_named_cache_options(parser, options)
1350
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001351 # TODO(crbug.com/932396): Remove this.
1352 use_go_isolated = (
1353 options.cipd_enabled and
Takuto Ikuta64a9c2c2020-02-06 06:10:07 +00001354 # TODO(crbug.com/1045281): windows other than win10 has flaky connection
1355 # issue.
1356 (sys.platform != 'win32' or platform.release() == '10'))
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001357
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001358 # TODO(maruel): CIPD caches should be defined at an higher level here too, so
1359 # they can be cleaned the same way.
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001360 if use_go_isolated:
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +00001361 isolate_cache = None
1362 else:
1363 isolate_cache = isolateserver.process_cache_options(options, trim=False)
1364
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001365 caches = []
1366 if isolate_cache:
1367 caches.append(isolate_cache)
1368 if named_cache:
1369 caches.append(named_cache)
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001370 root = caches[0].cache_dir if caches else six.text_type(os.getcwd())
maruel36a963d2016-04-08 17:15:49 -07001371 if options.clean:
1372 if options.isolated:
1373 parser.error('Can\'t use --isolated with --clean.')
1374 if options.isolate_server:
1375 parser.error('Can\'t use --isolate-server with --clean.')
1376 if options.json:
1377 parser.error('Can\'t use --json with --clean.')
nodirf33b8d62016-10-26 22:34:58 -07001378 if options.named_caches:
1379 parser.error('Can\t use --named-cache with --clean.')
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001380 # Trim first, then clean.
1381 local_caching.trim_caches(
1382 caches,
1383 root,
1384 min_free_space=options.min_free_space,
1385 max_age_secs=MAX_AGE_SECS)
1386 for c in caches:
Marc-Antoine Ruel87fc2222018-06-18 13:09:24 +00001387 c.cleanup()
maruel36a963d2016-04-08 17:15:49 -07001388 return 0
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001389
1390 # Trim must still be done for the following case:
1391 # - named-cache was used
1392 # - some entries, with a large hint, where missing
1393 # - --min-free-space was increased accordingly, thus trimming is needed
1394 # Otherwise, this will have no effect, as bot_main calls run_isolated with
1395 # --clean after each task.
1396 if hint:
1397 logging.info('Additional trimming of %d bytes', hint)
Marc-Antoine Ruel0d8b0f62018-09-10 14:40:35 +00001398 local_caching.trim_caches(
1399 caches,
1400 root,
1401 min_free_space=options.min_free_space,
1402 max_age_secs=MAX_AGE_SECS)
maruel36a963d2016-04-08 17:15:49 -07001403
nodir55be77b2016-05-03 09:39:57 -07001404 if not options.isolated and not args:
1405 parser.error('--isolated or command to run is required.')
1406
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001407 auth.process_auth_options(parser, options)
nodir55be77b2016-05-03 09:39:57 -07001408
1409 isolateserver.process_isolate_server_options(
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001410 parser, options, True, False)
nodir55be77b2016-05-03 09:39:57 -07001411 if not options.isolate_server:
1412 if options.isolated:
1413 parser.error('--isolated requires --isolate-server')
1414 if ISOLATED_OUTDIR_PARAMETER in args:
1415 parser.error(
1416 '%s in args requires --isolate-server' % ISOLATED_OUTDIR_PARAMETER)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001417
nodir90bc8dc2016-06-15 13:35:21 -07001418 if options.root_dir:
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001419 options.root_dir = six.text_type(os.path.abspath(options.root_dir))
maruel12e30012015-10-09 11:55:35 -07001420 if options.json:
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001421 options.json = six.text_type(os.path.abspath(options.json))
nodir55be77b2016-05-03 09:39:57 -07001422
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001423 if any('=' not in i for i in options.env):
1424 parser.error(
1425 '--env required key=value form. value can be skipped to delete '
1426 'the variable')
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -05001427 options.env = dict(i.split('=', 1) for i in options.env)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001428
1429 prefixes = {}
1430 cwd = os.path.realpath(os.getcwd())
1431 for item in options.env_prefix:
1432 if '=' not in item:
1433 parser.error(
1434 '--env-prefix %r is malformed, must be in the form `VAR=./path`'
1435 % item)
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -05001436 key, opath = item.split('=', 1)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001437 if os.path.isabs(opath):
1438 parser.error('--env-prefix %r path is bad, must be relative.' % opath)
1439 opath = os.path.normpath(opath)
1440 if not os.path.realpath(os.path.join(cwd, opath)).startswith(cwd):
1441 parser.error(
1442 '--env-prefix %r path is bad, must be relative and not contain `..`.'
1443 % opath)
1444 prefixes.setdefault(key, []).append(opath)
1445 options.env_prefix = prefixes
Robert Iannuccibf5f84c2017-11-22 12:56:50 -08001446
nodirbe642ff2016-06-09 15:51:51 -07001447 cipd.validate_cipd_options(parser, options)
1448
vadimsh232f5a82017-01-20 19:23:44 -08001449 install_packages_fn = noop_install_packages
vadimsh902948e2017-01-20 15:57:32 -08001450 if options.cipd_enabled:
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001451 install_packages_fn = (
1452 lambda run_dir, isolated_dir: install_client_and_packages(
vadimsh902948e2017-01-20 15:57:32 -08001453 run_dir, cipd.parse_package_args(options.cipd_packages),
1454 options.cipd_server, options.cipd_client_package,
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001455 options.cipd_client_version, cache_dir=options.cipd_cache,
1456 isolated_dir=isolated_dir))
nodirbe642ff2016-06-09 15:51:51 -07001457
nodird6160682017-02-02 13:03:35 -08001458 @contextlib.contextmanager
nodir0ae98b32017-05-11 13:21:53 -07001459 def install_named_caches(run_dir):
nodird6160682017-02-02 13:03:35 -08001460 # WARNING: this function depends on "options" variable defined in the outer
1461 # function.
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001462 assert six.text_type(run_dir), repr(run_dir)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001463 assert os.path.isabs(run_dir), run_dir
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001464 named_caches = [(os.path.join(run_dir, six.text_type(relpath)), name)
1465 for name, relpath, _ in options.named_caches]
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001466 for path, name in named_caches:
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001467 named_cache.install(path, name)
nodird6160682017-02-02 13:03:35 -08001468 try:
1469 yield
1470 finally:
dnje289d132017-07-07 11:16:44 -07001471 # Uninstall each named cache, returning it to the cache pool. If an
1472 # uninstall fails for a given cache, it will remain in the task's
1473 # temporary space, get cleaned up by the Swarming bot, and be lost.
1474 #
1475 # If the Swarming bot cannot clean up the cache, it will handle it like
1476 # any other bot file that could not be removed.
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001477 for path, name in reversed(named_caches):
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001478 try:
Marc-Antoine Ruele9558372018-08-03 03:41:22 +00001479 # uninstall() doesn't trim but does call save() implicitly. Trimming
1480 # *must* be done manually via periodic 'run_isolated.py --clean'.
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001481 named_cache.uninstall(path, name)
1482 except local_caching.NamedCacheError:
1483 logging.exception('Error while removing named cache %r at %r. '
1484 'The cache will be lost.', path, name)
nodirf33b8d62016-10-26 22:34:58 -07001485
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001486 extra_args = []
1487 command = []
1488 if options.raw_cmd:
1489 command = args
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001490 if options.relative_cwd:
1491 a = os.path.normpath(os.path.abspath(options.relative_cwd))
1492 if not a.startswith(os.getcwd()):
1493 parser.error(
1494 '--relative-cwd must not try to escape the working directory')
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001495 else:
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001496 if options.relative_cwd:
1497 parser.error('--relative-cwd requires --raw-cmd')
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001498 extra_args = args
1499
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001500 containment_type = subprocess42.Containment.NONE
1501 if options.containment_type == 'AUTO':
1502 containment_type = subprocess42.Containment.AUTO
1503 if options.containment_type == 'JOB_OBJECT':
1504 containment_type = subprocess42.Containment.JOB_OBJECT
1505 containment = subprocess42.Containment(
1506 containment_type=containment_type,
1507 limit_processes=options.limit_processes,
1508 limit_total_committed_memory=options.limit_total_committed_memory)
1509
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001510 data = TaskData(
1511 command=command,
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001512 relative_cwd=options.relative_cwd,
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001513 extra_args=extra_args,
1514 isolated_hash=options.isolated,
1515 storage=None,
1516 isolate_cache=isolate_cache,
1517 outputs=options.output,
1518 install_named_caches=install_named_caches,
1519 leak_temp_dir=options.leak_temp_dir,
1520 root_dir=_to_unicode(options.root_dir),
1521 hard_timeout=options.hard_timeout,
1522 grace_period=options.grace_period,
1523 bot_file=options.bot_file,
1524 switch_to_account=options.switch_to_account,
1525 install_packages_fn=install_packages_fn,
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001526 use_go_isolated=use_go_isolated,
Takuto Ikuta10cae642020-01-08 08:12:07 +00001527 go_cache_dir=options.cache,
Takuto Ikuta879788c2020-01-10 08:00:26 +00001528 go_cache_policies=local_caching.CachePolicies(
1529 max_cache_size=options.max_cache_size,
1530 min_free_space=options.min_free_space,
1531 max_items=options.max_items,
1532 max_age_secs=None,
1533 ),
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001534 env=options.env,
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +00001535 env_prefix=options.env_prefix,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001536 lower_priority=bool(options.lower_priority),
1537 containment=containment)
nodirbe642ff2016-06-09 15:51:51 -07001538 try:
nodir90bc8dc2016-06-15 13:35:21 -07001539 if options.isolate_server:
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +00001540 server_ref = isolate_storage.ServerRef(
nodir90bc8dc2016-06-15 13:35:21 -07001541 options.isolate_server, options.namespace)
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +00001542 storage = isolateserver.get_storage(server_ref)
nodir90bc8dc2016-06-15 13:35:21 -07001543 with storage:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001544 data = data._replace(storage=storage)
nodirf33b8d62016-10-26 22:34:58 -07001545 # Hashing schemes used by |storage| and |isolate_cache| MUST match.
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +00001546 assert storage.server_ref.hash_algo == server_ref.hash_algo
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001547 return run_tha_test(data, options.json)
1548 return run_tha_test(data, options.json)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001549 except (
1550 cipd.Error,
1551 local_caching.NamedCacheError,
Marc-Antoine Ruelb6e9e232018-11-20 00:12:33 +00001552 local_caching.NoMoreSpace) as ex:
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +00001553 print(ex.message, file=sys.stderr)
nodirbe642ff2016-06-09 15:51:51 -07001554 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001555
1556
1557if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -07001558 subprocess42.inhibit_os_error_reporting()
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001559 # Ensure that we are always running with the correct encoding.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001560 fix_encoding.fix_encoding()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001561 sys.exit(main(sys.argv[1:]))