blob: ad9833cd6d382354304d11be8d7b37bbfd716822 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
maruelea586f32016-04-05 11:11:33 -07002# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07003# Use of this source code is governed under the Apache License, Version 2.0
4# that can be found in the LICENSE file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00005
nodir55be77b2016-05-03 09:39:57 -07006"""Runs a command with optional isolated input/output.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +00008run_isolated takes cares of setting up a temporary environment, running a
9command, and tearing it down.
nodir55be77b2016-05-03 09:39:57 -070010
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +000011It handles downloading and uploading isolated files, mapping CIPD packages and
12reusing stateful named caches.
13
14The isolated files, CIPD packages and named caches are kept as a global LRU
15cache.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050016
Roberto Carrillo71ade6d2018-10-08 22:30:24 +000017Any ${EXECUTABLE_SUFFIX} on the command line or the environment variables passed
18with the --env option will be replaced with ".exe" string on Windows and "" on
19other platforms.
nodirbe642ff2016-06-09 15:51:51 -070020
Roberto Carrillo71ade6d2018-10-08 22:30:24 +000021Any ${ISOLATED_OUTDIR} on the command line or the environment variables passed
22with the --env option will be replaced by the location of a temporary directory
23upon execution of the command specified in the .isolated file. All content
24written to this directory will be uploaded upon termination and the .isolated
25file describing this directory will be printed to stdout.
bpastene447c1992016-06-20 15:21:47 -070026
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +000027Any ${SWARMING_BOT_FILE} on the command line or the environment variables passed
28with the --env option will be replaced by the value of the --bot-file parameter.
29This file is used by a swarming bot to communicate state of the host to tasks.
30It is written to by the swarming bot's on_before_task() hook in the swarming
31server's custom bot_config.py.
32
33See
34https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Magic-Values.md
35for all the variables.
36
37See
38https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/swarming_bot/config/bot_config.py
39for more information about bot_config.py.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000040"""
41
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +000042from __future__ import print_function
43
44__version__ = '1.0.1'
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000045
aludwin7556e0c2016-10-26 08:46:10 -070046import argparse
maruel064c0a32016-04-05 11:47:15 -070047import base64
iannucci96fcccc2016-08-30 15:52:22 -070048import collections
vadimsh232f5a82017-01-20 19:23:44 -080049import contextlib
Sadaf Matinkhoo10743a62018-03-29 16:28:58 -040050import errno
aludwin7556e0c2016-10-26 08:46:10 -070051import json
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000052import logging
53import optparse
54import os
Takuto Ikuta5c59a842020-01-24 03:05:24 +000055import platform
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040056import re
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000057import sys
58import tempfile
maruel064c0a32016-04-05 11:47:15 -070059import time
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000060
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000061from utils import tools
62tools.force_local_third_party()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000063
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000064# third_party/
65from depot_tools import fix_encoding
Takuto Ikuta6e2ff962019-10-29 12:35:27 +000066import six
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000067
68# pylint: disable=ungrouped-imports
69import auth
70import cipd
71import isolate_storage
72import isolateserver
73import local_caching
74from libs import luci_context
Vadim Shtayura6b555c12014-07-23 16:22:18 -070075from utils import file_path
maruel12e30012015-10-09 11:55:35 -070076from utils import fs
maruel064c0a32016-04-05 11:47:15 -070077from utils import large
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040078from utils import logging_utils
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040079from utils import on_error
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -050080from utils import subprocess42
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000081
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000082
maruele2f2cb82016-07-13 14:41:03 -070083# Magic variables that can be found in the isolate task command line.
84ISOLATED_OUTDIR_PARAMETER = '${ISOLATED_OUTDIR}'
85EXECUTABLE_SUFFIX_PARAMETER = '${EXECUTABLE_SUFFIX}'
86SWARMING_BOT_FILE_PARAMETER = '${SWARMING_BOT_FILE}'
87
88
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000089# The name of the log file to use.
90RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
91
maruele2f2cb82016-07-13 14:41:03 -070092
csharp@chromium.orge217f302012-11-22 16:51:53 +000093# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000094RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000095
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000096
maruele2f2cb82016-07-13 14:41:03 -070097# Use short names for temporary directories. This is driven by Windows, which
98# imposes a relatively short maximum path length of 260 characters, often
99# referred to as MAX_PATH. It is relatively easy to create files with longer
Marc-Antoine Ruel793bff32019-04-18 17:50:48 +0000100# path length. A use case is with recursive dependency trees like npm packages.
maruele2f2cb82016-07-13 14:41:03 -0700101#
102# It is recommended to start the script with a `root_dir` as short as
103# possible.
104# - ir stands for isolated_run
105# - io stands for isolated_out
106# - it stands for isolated_tmp
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000107# - ic stands for isolated_client
maruele2f2cb82016-07-13 14:41:03 -0700108ISOLATED_RUN_DIR = u'ir'
109ISOLATED_OUT_DIR = u'io'
110ISOLATED_TMP_DIR = u'it'
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000111ISOLATED_CLIENT_DIR = u'ic'
maruele2f2cb82016-07-13 14:41:03 -0700112
Takuto Ikuta02edca22019-11-29 10:04:51 +0000113# TODO(tikuta): take these parameter from luci-config?
Takuto Ikutac8c92e62020-04-01 07:07:29 +0000114# Update tag by `./client/update_isolated.sh`.
115# Or take revision from
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000116# https://ci.chromium.org/p/infra-internal/g/infra-packagers/console
Takuto Ikuta02edca22019-11-29 10:04:51 +0000117ISOLATED_PACKAGE = 'infra/tools/luci/isolated/${platform}'
Takuto Ikuta99e00d62020-04-15 07:13:56 +0000118ISOLATED_REVISION = 'git_revision:be4040d5064cbac78820314bb1aac02f0c2a3c3e'
maruele2f2cb82016-07-13 14:41:03 -0700119
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400120# Keep synced with task_request.py
Lei Leife202df2019-06-11 17:33:34 +0000121CACHE_NAME_RE = re.compile(r'^[a-z0-9_]{1,4096}$')
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400122
123
marueld928c862017-06-08 08:20:04 -0700124OUTLIVING_ZOMBIE_MSG = """\
125*** Swarming tried multiple times to delete the %s directory and failed ***
126*** Hard failing the task ***
127
128Swarming detected that your testing script ran an executable, which may have
129started a child executable, and the main script returned early, leaving the
130children executables playing around unguided.
131
132You don't want to leave children processes outliving the task on the Swarming
133bot, do you? The Swarming bot doesn't.
134
135How to fix?
136- For any process that starts children processes, make sure all children
137 processes terminated properly before each parent process exits. This is
138 especially important in very deep process trees.
139 - This must be done properly both in normal successful task and in case of
140 task failure. Cleanup is very important.
141- The Swarming bot sends a SIGTERM in case of timeout.
142 - You have %s seconds to comply after the signal was sent to the process
143 before the process is forcibly killed.
144- To achieve not leaking children processes in case of signals on timeout, you
145 MUST handle signals in each executable / python script and propagate them to
146 children processes.
147 - When your test script (python or binary) receives a signal like SIGTERM or
148 CTRL_BREAK_EVENT on Windows), send it to all children processes and wait for
149 them to terminate before quitting.
150
151See
Marc-Antoine Ruelc7243592018-05-24 17:04:04 -0400152https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Bot.md#Graceful-termination_aka-the-SIGTERM-and-SIGKILL-dance
marueld928c862017-06-08 08:20:04 -0700153for more information.
154
155*** May the SIGKILL force be with you ***
156"""
157
158
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000159# Currently hardcoded. Eventually could be exposed as a flag once there's value.
160# 3 weeks
161MAX_AGE_SECS = 21*24*60*60
162
163
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500164TaskData = collections.namedtuple(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000165 'TaskData',
166 [
Takuto Ikuta9a319502019-11-26 07:40:14 +0000167 # List of strings; the command line to use, independent of what was
168 # specified in the isolated file.
169 'command',
170 # Relative directory to start command into.
171 'relative_cwd',
172 # List of strings; the arguments to add to the command specified in the
173 # isolated file.
174 'extra_args',
175 # Hash of the .isolated file that must be retrieved to recreate the tree
176 # of files to run the target executable. The command specified in the
177 # .isolated is executed. Mutually exclusive with command argument.
178 'isolated_hash',
179 # isolateserver.Storage instance to retrieve remote objects. This object
180 # has a reference to an isolateserver.StorageApi, which does the actual
181 # I/O.
182 'storage',
183 # isolateserver.LocalCache instance to keep from retrieving the same
184 # objects constantly by caching the objects retrieved. Can be on-disk or
185 # in-memory.
186 'isolate_cache',
187 # List of paths relative to root_dir to put into the output isolated
188 # bundle upon task completion (see link_outputs_to_outdir).
189 'outputs',
190 # Function (run_dir) => context manager that installs named caches into
191 # |run_dir|.
192 'install_named_caches',
193 # If True, the temporary directory will be deliberately leaked for later
194 # examination.
195 'leak_temp_dir',
196 # Path to the directory to use to create the temporary directory. If not
197 # specified, a random temporary directory is created.
198 'root_dir',
199 # Kills the process if it lasts more than this amount of seconds.
200 'hard_timeout',
201 # Number of seconds to wait between SIGTERM and SIGKILL.
202 'grace_period',
203 # Path to a file with bot state, used in place of ${SWARMING_BOT_FILE}
204 # task command line argument.
205 'bot_file',
206 # Logical account to switch LUCI_CONTEXT into.
207 'switch_to_account',
208 # Context manager dir => CipdInfo, see install_client_and_packages.
209 'install_packages_fn',
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000210 # Use go isolated client.
211 'use_go_isolated',
Takuto Ikuta057c5342019-12-03 04:05:05 +0000212 # Cache directory for go isolated client.
213 'go_cache_dir',
Takuto Ikuta879788c2020-01-10 08:00:26 +0000214 # Parameters passed to go isolated client.
215 'go_cache_policies',
Takuto Ikuta9a319502019-11-26 07:40:14 +0000216 # Environment variables to set.
217 'env',
218 # Environment variables to mutate with relative directories.
219 # Example: {"ENV_KEY": ['relative', 'paths', 'to', 'prepend']}
220 'env_prefix',
221 # Lowers the task process priority.
222 'lower_priority',
223 # subprocess42.Containment instance. Can be None.
224 'containment',
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000225 ])
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500226
227
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -0500228def _to_str(s):
229 """Downgrades a unicode instance to str. Pass str through as-is."""
230 if isinstance(s, str):
231 return s
232 # This is technically incorrect, especially on Windows. In theory
233 # sys.getfilesystemencoding() should be used to use the right 'ANSI code
234 # page' on Windows, but that causes other problems, as the character set
235 # is very limited.
236 return s.encode('utf-8')
237
238
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500239def _to_unicode(s):
240 """Upgrades a str instance to unicode. Pass unicode through as-is."""
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000241 if isinstance(s, six.text_type) or s is None:
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500242 return s
243 return s.decode('utf-8')
244
245
maruel03e11842016-07-14 10:50:16 -0700246def make_temp_dir(prefix, root_dir):
247 """Returns a new unique temporary directory."""
Takuto Ikuta6e2ff962019-10-29 12:35:27 +0000248 return six.text_type(tempfile.mkdtemp(prefix=prefix, dir=root_dir))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000249
250
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500251def change_tree_read_only(rootdir, read_only):
252 """Changes the tree read-only bits according to the read_only specification.
253
254 The flag can be 0, 1 or 2, which will affect the possibility to modify files
255 and create or delete files.
256 """
257 if read_only == 2:
258 # Files and directories (except on Windows) are marked read only. This
259 # inhibits modifying, creating or deleting files in the test directory,
260 # except on Windows where creating and deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400261 file_path.make_tree_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500262 elif read_only == 1:
263 # Files are marked read only but not the directories. This inhibits
264 # modifying files but creating or deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400265 file_path.make_tree_files_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500266 elif read_only in (0, None):
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500267 # Anything can be modified.
Marc-Antoine Ruel2666d9c2018-05-18 13:52:02 -0400268 # TODO(maruel): This is currently dangerous as long as
269 # DiskContentAddressedCache.touch() is not yet changed to verify the hash of
270 # the content of the files it is looking at, so that if a test modifies an
271 # input file, the file must be deleted.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400272 file_path.make_tree_writeable(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500273 else:
274 raise ValueError(
275 'change_tree_read_only(%s, %s): Unknown flag %s' %
276 (rootdir, read_only, read_only))
277
278
vadimsh9c54b2c2017-07-25 14:08:29 -0700279@contextlib.contextmanager
280def set_luci_context_account(account, tmp_dir):
281 """Sets LUCI_CONTEXT account to be used by the task.
282
283 If 'account' is None or '', does nothing at all. This happens when
284 run_isolated.py is called without '--switch-to-account' flag. In this case,
285 if run_isolated.py is running in some LUCI_CONTEXT environment, the task will
Takuto Ikuta33e2ff32019-09-30 12:44:03 +0000286 just inherit whatever account is already set. This may happen if users invoke
vadimsh9c54b2c2017-07-25 14:08:29 -0700287 run_isolated.py explicitly from their code.
288
289 If the requested account is not defined in the context, switches to
290 non-authenticated access. This happens for Swarming tasks that don't use
291 'task' service accounts.
292
293 If not using LUCI_CONTEXT-based auth, does nothing.
294 If already running as requested account, does nothing.
295 """
296 if not account:
297 # Not actually switching.
298 yield
299 return
300
301 local_auth = luci_context.read('local_auth')
302 if not local_auth:
303 # Not using LUCI_CONTEXT auth at all.
304 yield
305 return
306
307 # See LUCI_CONTEXT.md for the format of 'local_auth'.
308 if local_auth.get('default_account_id') == account:
309 # Already set, no need to switch.
310 yield
311 return
312
313 available = {a['id'] for a in local_auth.get('accounts') or []}
314 if account in available:
315 logging.info('Switching default LUCI_CONTEXT account to %r', account)
316 local_auth['default_account_id'] = account
317 else:
318 logging.warning(
319 'Requested LUCI_CONTEXT account %r is not available (have only %r), '
320 'disabling authentication', account, sorted(available))
321 local_auth.pop('default_account_id', None)
322
323 with luci_context.write(_tmpdir=tmp_dir, local_auth=local_auth):
324 yield
325
326
nodir90bc8dc2016-06-15 13:35:21 -0700327def process_command(command, out_dir, bot_file):
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000328 """Replaces parameters in a command line.
nodirbe642ff2016-06-09 15:51:51 -0700329
330 Raises:
331 ValueError if a parameter is requested in |command| but its value is not
332 provided.
333 """
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000334 return [replace_parameters(arg, out_dir, bot_file) for arg in command]
335
336
337def replace_parameters(arg, out_dir, bot_file):
338 """Replaces parameter tokens with appropriate values in a string.
339
340 Raises:
341 ValueError if a parameter is requested in |arg| but its value is not
342 provided.
343 """
344 arg = arg.replace(EXECUTABLE_SUFFIX_PARAMETER, cipd.EXECUTABLE_SUFFIX)
345 replace_slash = False
346 if ISOLATED_OUTDIR_PARAMETER in arg:
347 if not out_dir:
348 raise ValueError(
349 'output directory is requested in command or env var, but not '
350 'provided; please specify one')
351 arg = arg.replace(ISOLATED_OUTDIR_PARAMETER, out_dir)
352 replace_slash = True
353 if SWARMING_BOT_FILE_PARAMETER in arg:
354 if bot_file:
355 arg = arg.replace(SWARMING_BOT_FILE_PARAMETER, bot_file)
nodirbe642ff2016-06-09 15:51:51 -0700356 replace_slash = True
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000357 else:
358 logging.warning('SWARMING_BOT_FILE_PARAMETER found in command or env '
359 'var, but no bot_file specified. Leaving parameter '
360 'unchanged.')
361 if replace_slash:
362 # Replace slashes only if parameters are present
363 # because of arguments like '${ISOLATED_OUTDIR}/foo/bar'
364 arg = arg.replace('/', os.sep)
365 return arg
maruela9cfd6f2015-09-15 11:03:15 -0700366
367
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000368
369def get_command_env(tmp_dir, cipd_info, run_dir, env, env_prefixes, out_dir,
370 bot_file):
vadimsh232f5a82017-01-20 19:23:44 -0800371 """Returns full OS environment to run a command in.
372
Robert Iannuccibf5f84c2017-11-22 12:56:50 -0800373 Sets up TEMP, puts directory with cipd binary in front of PATH, exposes
374 CIPD_CACHE_DIR env var, and installs all env_prefixes.
vadimsh232f5a82017-01-20 19:23:44 -0800375
376 Args:
377 tmp_dir: temp directory.
378 cipd_info: CipdInfo object is cipd client is used, None if not.
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500379 run_dir: The root directory the isolated tree is mapped in.
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500380 env: environment variables to use
Robert Iannuccibf5f84c2017-11-22 12:56:50 -0800381 env_prefixes: {"ENV_KEY": ['cwd', 'relative', 'paths', 'to', 'prepend']}
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000382 out_dir: Isolated output directory. Required to be != None if any of the
383 env vars contain ISOLATED_OUTDIR_PARAMETER.
384 bot_file: Required to be != None if any of the env vars contain
385 SWARMING_BOT_FILE_PARAMETER.
vadimsh232f5a82017-01-20 19:23:44 -0800386 """
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500387 out = os.environ.copy()
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000388 for k, v in env.items():
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500389 if not v:
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500390 out.pop(k, None)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500391 else:
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000392 out[k] = replace_parameters(v, out_dir, bot_file)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500393
394 if cipd_info:
395 bin_dir = os.path.dirname(cipd_info.client.binary_path)
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -0500396 out['PATH'] = '%s%s%s' % (_to_str(bin_dir), os.pathsep, out['PATH'])
397 out['CIPD_CACHE_DIR'] = _to_str(cipd_info.cache_dir)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500398
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000399 for key, paths in env_prefixes.items():
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500400 assert isinstance(paths, list), paths
401 paths = [os.path.normpath(os.path.join(run_dir, p)) for p in paths]
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500402 cur = out.get(key)
403 if cur:
404 paths.append(cur)
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -0500405 out[key] = _to_str(os.path.pathsep.join(paths))
vadimsh232f5a82017-01-20 19:23:44 -0800406
Marc-Antoine Ruelefb30b12018-07-25 18:34:36 +0000407 tmp_dir = _to_str(tmp_dir)
408 # pylint: disable=line-too-long
409 # * python respects $TMPDIR, $TEMP, and $TMP in this order, regardless of
410 # platform. So $TMPDIR must be set on all platforms.
411 # https://github.com/python/cpython/blob/2.7/Lib/tempfile.py#L155
412 out['TMPDIR'] = tmp_dir
413 if sys.platform == 'win32':
414 # * chromium's base utils uses GetTempPath().
415 # https://cs.chromium.org/chromium/src/base/files/file_util_win.cc?q=GetTempPath
416 # * Go uses GetTempPath().
417 # * GetTempDir() uses %TMP%, then %TEMP%, then other stuff. So %TMP% must be
418 # set.
419 # https://docs.microsoft.com/en-us/windows/desktop/api/fileapi/nf-fileapi-gettemppathw
420 out['TMP'] = tmp_dir
421 # https://blogs.msdn.microsoft.com/oldnewthing/20150417-00/?p=44213
422 out['TEMP'] = tmp_dir
423 elif sys.platform == 'darwin':
424 # * Chromium uses an hack on macOS before calling into
425 # NSTemporaryDirectory().
426 # https://cs.chromium.org/chromium/src/base/files/file_util_mac.mm?q=GetTempDir
427 # https://developer.apple.com/documentation/foundation/1409211-nstemporarydirectory
428 out['MAC_CHROMIUM_TMPDIR'] = tmp_dir
429 else:
430 # TMPDIR is specified as the POSIX standard envvar for the temp directory.
431 # http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
432 # * mktemp on linux respects $TMPDIR.
433 # * Chromium respects $TMPDIR on linux.
434 # https://cs.chromium.org/chromium/src/base/files/file_util_posix.cc?q=GetTempDir
435 # * Go uses $TMPDIR.
436 # https://go.googlesource.com/go/+/go1.10.3/src/os/file_unix.go#307
437 pass
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500438 return out
vadimsh232f5a82017-01-20 19:23:44 -0800439
440
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000441def run_command(
442 command, cwd, env, hard_timeout, grace_period, lower_priority, containment):
maruel6be7f9e2015-10-01 12:25:30 -0700443 """Runs the command.
444
445 Returns:
446 tuple(process exit code, bool if had a hard timeout)
447 """
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000448 logging.info(
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000449 'run_command(%s, %s, %s, %s, %s, %s)',
450 command, cwd, hard_timeout, grace_period, lower_priority, containment)
marueleb5fbee2015-09-17 13:01:36 -0700451
maruel6be7f9e2015-10-01 12:25:30 -0700452 exit_code = None
453 had_hard_timeout = False
maruela9cfd6f2015-09-15 11:03:15 -0700454 with tools.Profiler('RunTest'):
maruel6be7f9e2015-10-01 12:25:30 -0700455 proc = None
456 had_signal = []
maruela9cfd6f2015-09-15 11:03:15 -0700457 try:
maruel6be7f9e2015-10-01 12:25:30 -0700458 # TODO(maruel): This code is imperfect. It doesn't handle well signals
459 # during the download phase and there's short windows were things can go
460 # wrong.
461 def handler(signum, _frame):
462 if proc and not had_signal:
463 logging.info('Received signal %d', signum)
464 had_signal.append(True)
maruel556d9052015-10-05 11:12:44 -0700465 raise subprocess42.TimeoutExpired(command, None)
maruel6be7f9e2015-10-01 12:25:30 -0700466
Marc-Antoine Ruel30b80fe2019-02-08 13:51:31 +0000467 proc = subprocess42.Popen(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000468 command, cwd=cwd, env=env, detached=True, close_fds=True,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000469 lower_priority=lower_priority, containment=containment)
maruel6be7f9e2015-10-01 12:25:30 -0700470 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
471 try:
John Budorickc398f092019-06-10 22:49:44 +0000472 exit_code = proc.wait(hard_timeout or None)
maruel6be7f9e2015-10-01 12:25:30 -0700473 except subprocess42.TimeoutExpired:
474 if not had_signal:
475 logging.warning('Hard timeout')
476 had_hard_timeout = True
477 logging.warning('Sending SIGTERM')
478 proc.terminate()
479
480 # Ignore signals in grace period. Forcibly give the grace period to the
481 # child process.
482 if exit_code is None:
483 ignore = lambda *_: None
484 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
485 try:
486 exit_code = proc.wait(grace_period or None)
487 except subprocess42.TimeoutExpired:
488 # Now kill for real. The user can distinguish between the
489 # following states:
490 # - signal but process exited within grace period,
491 # hard_timed_out will be set but the process exit code will be
492 # script provided.
493 # - processed exited late, exit code will be -9 on posix.
494 logging.warning('Grace exhausted; sending SIGKILL')
495 proc.kill()
martiniss5c8043e2017-08-01 17:09:43 -0700496 logging.info('Waiting for process exit')
maruel6be7f9e2015-10-01 12:25:30 -0700497 exit_code = proc.wait()
Takuto Ikutaeccf0862020-03-19 03:05:55 +0000498 except OSError as e:
maruela9cfd6f2015-09-15 11:03:15 -0700499 # This is not considered to be an internal error. The executable simply
500 # does not exit.
maruela72f46e2016-02-24 11:05:45 -0800501 sys.stderr.write(
tikuta2d678212019-09-23 23:12:08 +0000502 '<The executable does not exist, a dependent library is missing or '
503 'the command line is too long>\n'
504 '<Check for missing .so/.dll in the .isolate or GN file or length of '
505 'command line args>\n'
Takuto Ikutaeccf0862020-03-19 03:05:55 +0000506 '<Command: %s, Exception: %s>\n' % (command, e))
maruela72f46e2016-02-24 11:05:45 -0800507 if os.environ.get('SWARMING_TASK_ID'):
508 # Give an additional hint when running as a swarming task.
509 sys.stderr.write(
510 '<See the task\'s page for commands to help diagnose this issue '
511 'by reproducing the task locally>\n')
maruela9cfd6f2015-09-15 11:03:15 -0700512 exit_code = 1
513 logging.info(
514 'Command finished with exit code %d (%s)',
515 exit_code, hex(0xffffffff & exit_code))
maruel6be7f9e2015-10-01 12:25:30 -0700516 return exit_code, had_hard_timeout
maruela9cfd6f2015-09-15 11:03:15 -0700517
518
Takuto Ikuta879788c2020-01-10 08:00:26 +0000519def _fetch_and_map_with_go(isolated_hash, storage, outdir, go_cache_dir,
520 policies, isolated_client):
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000521 """
522 Fetches an isolated tree using go client, create the tree and returns
523 (bundle, stats).
524 """
525 start = time.time()
526 server_ref = storage.server_ref
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000527 result_json_handle, result_json_path = tempfile.mkstemp(
528 prefix=u'fetch-and-map-result-', suffix=u'.json')
529 os.close(result_json_handle)
530 try:
Ye Kuanga98764c2020-04-09 03:17:37 +0000531 cmd = [
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000532 isolated_client,
533 'download',
534 '-isolate-server',
535 server_ref.url,
536 '-namespace',
537 server_ref.namespace,
538 '-isolated',
539 isolated_hash,
540
541 # flags for cache
542 '-cache-dir',
Takuto Ikuta057c5342019-12-03 04:05:05 +0000543 go_cache_dir,
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000544 '-cache-max-items',
Takuto Ikuta50bc0552019-12-03 03:26:46 +0000545 str(policies.max_items),
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000546 '-cache-max-size',
Takuto Ikuta50bc0552019-12-03 03:26:46 +0000547 str(policies.max_cache_size),
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000548 '-cache-min-free-space',
Takuto Ikuta50bc0552019-12-03 03:26:46 +0000549 str(policies.min_free_space),
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000550
551 # flags for output
552 '-output-dir',
553 outdir,
554 '-fetch-and-map-result-json',
555 result_json_path,
Ye Kuanga98764c2020-04-09 03:17:37 +0000556 ]
557 proc = subprocess42.Popen(cmd)
558 cmd_str = ' '.join(cmd)
Takuto Ikuta3153e3b2020-02-18 06:11:47 +0000559
Ye Kuanga98764c2020-04-09 03:17:37 +0000560 exceeded_max_timeout = True
561 check_period_sec = 30
562 max_checks = 100
563 # max timeout = max_checks * check_period_sec = 50 minutes
564 for i in range(max_checks):
Takuto Ikuta3153e3b2020-02-18 06:11:47 +0000565 # This is to prevent I/O timeout error during isolated setup.
566 try:
Ye Kuanga98764c2020-04-09 03:17:37 +0000567 retcode = proc.wait(check_period_sec)
Takuto Ikutab64c5192020-02-19 02:46:34 +0000568 if retcode != 0:
Ye Kuanga98764c2020-04-09 03:17:37 +0000569 raise ValueError("retcode is not 0: %s (cmd=%s)" % (retcode, cmd_str))
570 exceeded_max_timeout = False
Takuto Ikuta3153e3b2020-02-18 06:11:47 +0000571 break
572 except subprocess42.TimeoutExpired:
Ye Kuanga98764c2020-04-09 03:17:37 +0000573 print('still running isolated (after %d seconds)' % (
574 (i + 1) * check_period_sec))
575
576 if exceeded_max_timeout:
577 proc.terminate()
578 try:
579 proc.wait(check_period_sec)
580 except subprocess42.TimeoutExpired:
581 proc.kill()
582 proc.wait()
583 # Raise unconditionally, because |proc| was forcefully terminated.
584 raise ValueError("timedout after %d seconds (cmd=%s)",
585 (check_period_sec * max_checks, cmd_str))
Takuto Ikuta3153e3b2020-02-18 06:11:47 +0000586
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000587 with open(result_json_path) as json_file:
588 result_json = json.load(json_file)
589
590 isolated = result_json['isolated']
591 bundle = isolateserver.IsolatedBundle(filter_cb=None)
592 # Only following properties are used in caller.
593 bundle.command = isolated.get('command')
594 bundle.read_only = isolated.get('read_only')
595 bundle.relative_cwd = isolated.get('relative_cwd')
596
597 return bundle, {
598 'duration': time.time() - start,
599 'items_cold': result_json['items_cold'],
600 'items_hot': result_json['items_hot'],
601 }
602 finally:
603 fs.remove(result_json_path)
604
605
606# TODO(crbug.com/932396): remove this function.
Takuto Ikuta16fac4b2019-12-09 04:57:18 +0000607def fetch_and_map(isolated_hash, storage, cache, outdir):
maruel4409e302016-07-19 14:25:51 -0700608 """Fetches an isolated tree, create the tree and returns (bundle, stats)."""
nodir6f801882016-04-29 14:41:50 -0700609 start = time.time()
610 bundle = isolateserver.fetch_isolated(
611 isolated_hash=isolated_hash,
612 storage=storage,
613 cache=cache,
maruel4409e302016-07-19 14:25:51 -0700614 outdir=outdir,
Takuto Ikuta16fac4b2019-12-09 04:57:18 +0000615 use_symlinks=False)
Takuto Ikuta2b9640e2019-06-19 00:53:23 +0000616 hot = (collections.Counter(cache.used) -
617 collections.Counter(cache.added)).elements()
nodir6f801882016-04-29 14:41:50 -0700618 return bundle, {
619 'duration': time.time() - start,
nodir6f801882016-04-29 14:41:50 -0700620 'items_cold': base64.b64encode(large.pack(sorted(cache.added))),
Takuto Ikuta2b9640e2019-06-19 00:53:23 +0000621 'items_hot': base64.b64encode(large.pack(sorted(hot))),
nodir6f801882016-04-29 14:41:50 -0700622 }
623
624
aludwin0a8e17d2016-10-27 15:57:39 -0700625def link_outputs_to_outdir(run_dir, out_dir, outputs):
626 """Links any named outputs to out_dir so they can be uploaded.
627
628 Raises an error if the file already exists in that directory.
629 """
630 if not outputs:
631 return
632 isolateserver.create_directories(out_dir, outputs)
633 for o in outputs:
Sadaf Matinkhoo10743a62018-03-29 16:28:58 -0400634 copy_recursively(os.path.join(run_dir, o), os.path.join(out_dir, o))
635
636
637def copy_recursively(src, dst):
638 """Efficiently copies a file or directory from src_dir to dst_dir.
639
640 `item` may be a file, directory, or a symlink to a file or directory.
641 All symlinks are replaced with their targets, so the resulting
642 directory structure in dst_dir will never have any symlinks.
643
644 To increase speed, copy_recursively hardlinks individual files into the
645 (newly created) directory structure if possible, unlike Python's
646 shutil.copytree().
647 """
648 orig_src = src
649 try:
650 # Replace symlinks with their final target.
651 while fs.islink(src):
652 res = fs.readlink(src)
653 src = os.path.join(os.path.dirname(src), res)
654 # TODO(sadafm): Explicitly handle cyclic symlinks.
655
656 # Note that fs.isfile (which is a wrapper around os.path.isfile) throws
657 # an exception if src does not exist. A warning will be logged in that case.
658 if fs.isfile(src):
659 file_path.link_file(dst, src, file_path.HARDLINK_WITH_FALLBACK)
660 return
661
662 if not fs.exists(dst):
663 os.makedirs(dst)
664
665 for child in fs.listdir(src):
666 copy_recursively(os.path.join(src, child), os.path.join(dst, child))
667
668 except OSError as e:
669 if e.errno == errno.ENOENT:
670 logging.warning('Path %s does not exist or %s is a broken symlink',
671 src, orig_src)
672 else:
673 logging.info("Couldn't collect output file %s: %s", src, e)
aludwin0a8e17d2016-10-27 15:57:39 -0700674
675
Takuto Ikutab4aa8662019-09-17 05:54:36 +0000676def upload_then_delete(storage, out_dir, leak_temp_dir):
maruela9cfd6f2015-09-15 11:03:15 -0700677 """Deletes the temporary run directory and uploads results back.
678
679 Returns:
nodir6f801882016-04-29 14:41:50 -0700680 tuple(outputs_ref, success, stats)
maruel064c0a32016-04-05 11:47:15 -0700681 - outputs_ref: a dict referring to the results archived back to the isolated
682 server, if applicable.
683 - success: False if something occurred that means that the task must
684 forcibly be considered a failure, e.g. zombie processes were left
685 behind.
nodir6f801882016-04-29 14:41:50 -0700686 - stats: uploading stats.
maruela9cfd6f2015-09-15 11:03:15 -0700687 """
maruela9cfd6f2015-09-15 11:03:15 -0700688 # Upload out_dir and generate a .isolated file out of this directory. It is
689 # only done if files were written in the directory.
690 outputs_ref = None
maruel064c0a32016-04-05 11:47:15 -0700691 cold = []
692 hot = []
nodir6f801882016-04-29 14:41:50 -0700693 start = time.time()
694
maruel12e30012015-10-09 11:55:35 -0700695 if fs.isdir(out_dir) and fs.listdir(out_dir):
maruela9cfd6f2015-09-15 11:03:15 -0700696 with tools.Profiler('ArchiveOutput'):
697 try:
maruel064c0a32016-04-05 11:47:15 -0700698 results, f_cold, f_hot = isolateserver.archive_files_to_storage(
Takuto Ikuta26980872020-04-09 06:56:37 +0000699 storage, [out_dir], None, verify_push=True)
maruela9cfd6f2015-09-15 11:03:15 -0700700 outputs_ref = {
Marc-Antoine Rueld0868ec2018-11-28 20:47:29 +0000701 'isolated': results.values()[0],
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +0000702 'isolatedserver': storage.server_ref.url,
703 'namespace': storage.server_ref.namespace,
maruela9cfd6f2015-09-15 11:03:15 -0700704 }
maruel064c0a32016-04-05 11:47:15 -0700705 cold = sorted(i.size for i in f_cold)
706 hot = sorted(i.size for i in f_hot)
maruela9cfd6f2015-09-15 11:03:15 -0700707 except isolateserver.Aborted:
708 # This happens when a signal SIGTERM was received while uploading data.
709 # There is 2 causes:
710 # - The task was too slow and was about to be killed anyway due to
711 # exceeding the hard timeout.
712 # - The amount of data uploaded back is very large and took too much
713 # time to archive.
714 sys.stderr.write('Received SIGTERM while uploading')
715 # Re-raise, so it will be treated as an internal failure.
716 raise
nodir6f801882016-04-29 14:41:50 -0700717
718 success = False
maruela9cfd6f2015-09-15 11:03:15 -0700719 try:
maruel12e30012015-10-09 11:55:35 -0700720 if (not leak_temp_dir and fs.isdir(out_dir) and
maruel6eeea7d2015-09-16 12:17:42 -0700721 not file_path.rmtree(out_dir)):
maruela9cfd6f2015-09-15 11:03:15 -0700722 logging.error('Had difficulties removing out_dir %s', out_dir)
nodir6f801882016-04-29 14:41:50 -0700723 else:
724 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700725 except OSError as e:
726 # When this happens, it means there's a process error.
maruel12e30012015-10-09 11:55:35 -0700727 logging.exception('Had difficulties removing out_dir %s: %s', out_dir, e)
nodir6f801882016-04-29 14:41:50 -0700728 stats = {
729 'duration': time.time() - start,
730 'items_cold': base64.b64encode(large.pack(cold)),
731 'items_hot': base64.b64encode(large.pack(hot)),
732 }
733 return outputs_ref, success, stats
maruela9cfd6f2015-09-15 11:03:15 -0700734
735
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500736def map_and_run(data, constant_run_path):
nodir55be77b2016-05-03 09:39:57 -0700737 """Runs a command with optional isolated input/output.
738
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500739 Arguments:
740 - data: TaskData instance.
741 - constant_run_path: TODO
nodir55be77b2016-05-03 09:39:57 -0700742
743 Returns metadata about the result.
744 """
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000745
746 if data.isolate_cache:
747 download_stats = {
748 #'duration': 0.,
749 'initial_number_items': len(data.isolate_cache),
750 'initial_size': data.isolate_cache.total_size,
751 #'items_cold': '<large.pack()>',
752 #'items_hot': '<large.pack()>',
753 }
754 else:
755 # TODO(tikuta): take stats from state.json in this case too.
756 download_stats = {}
757
maruela9cfd6f2015-09-15 11:03:15 -0700758 result = {
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000759 'duration': None,
760 'exit_code': None,
761 'had_hard_timeout': False,
762 'internal_failure': 'run_isolated did not complete properly',
763 'stats': {
764 #'cipd': {
765 # 'duration': 0.,
766 # 'get_client_duration': 0.,
767 #},
768 'isolated': {
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000769 'download': download_stats,
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000770 #'upload': {
771 # 'duration': 0.,
772 # 'items_cold': '<large.pack()>',
773 # 'items_hot': '<large.pack()>',
774 #},
775 },
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000776 },
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000777 #'cipd_pins': {
778 # 'packages': [
779 # {'package_name': ..., 'version': ..., 'path': ...},
780 # ...
781 # ],
782 # 'client_package': {'package_name': ..., 'version': ...},
783 #},
784 'outputs_ref': None,
785 'version': 5,
maruela9cfd6f2015-09-15 11:03:15 -0700786 }
nodirbe642ff2016-06-09 15:51:51 -0700787
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500788 if data.root_dir:
Lei Leife202df2019-06-11 17:33:34 +0000789 file_path.ensure_tree(data.root_dir, 0o700)
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000790 elif data.use_go_isolated:
791 data = data._replace(root_dir=os.path.dirname(data.go_cache_dir))
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500792 elif data.isolate_cache.cache_dir:
793 data = data._replace(
794 root_dir=os.path.dirname(data.isolate_cache.cache_dir))
maruele2f2cb82016-07-13 14:41:03 -0700795 # See comment for these constants.
maruelcffa0542017-04-07 08:39:20 -0700796 # If root_dir is not specified, it is not constant.
797 # TODO(maruel): This is not obvious. Change this to become an error once we
798 # make the constant_run_path an exposed flag.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500799 if constant_run_path and data.root_dir:
800 run_dir = os.path.join(data.root_dir, ISOLATED_RUN_DIR)
maruel5c4eed82017-05-26 05:33:40 -0700801 if os.path.isdir(run_dir):
802 file_path.rmtree(run_dir)
Lei Leife202df2019-06-11 17:33:34 +0000803 os.mkdir(run_dir, 0o700)
maruelcffa0542017-04-07 08:39:20 -0700804 else:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500805 run_dir = make_temp_dir(ISOLATED_RUN_DIR, data.root_dir)
maruel03e11842016-07-14 10:50:16 -0700806 # storage should be normally set but don't crash if it is not. This can happen
807 # as Swarming task can run without an isolate server.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500808 out_dir = make_temp_dir(
809 ISOLATED_OUT_DIR, data.root_dir) if data.storage else None
810 tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, data.root_dir)
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000811 isolated_client_dir = make_temp_dir(ISOLATED_CLIENT_DIR, data.root_dir)
nodir55be77b2016-05-03 09:39:57 -0700812 cwd = run_dir
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -0500813 if data.relative_cwd:
814 cwd = os.path.normpath(os.path.join(cwd, data.relative_cwd))
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500815 command = data.command
nodir55be77b2016-05-03 09:39:57 -0700816 try:
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000817 with data.install_packages_fn(run_dir, isolated_client_dir) as cipd_info:
vadimsh232f5a82017-01-20 19:23:44 -0800818 if cipd_info:
819 result['stats']['cipd'] = cipd_info.stats
820 result['cipd_pins'] = cipd_info.pins
nodir90bc8dc2016-06-15 13:35:21 -0700821
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500822 if data.isolated_hash:
vadimsh232f5a82017-01-20 19:23:44 -0800823 isolated_stats = result['stats'].setdefault('isolated', {})
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000824 if data.use_go_isolated:
Takuto Ikuta90397ca2020-01-08 10:07:55 +0000825 bundle, stats = _fetch_and_map_with_go(
826 isolated_hash=data.isolated_hash,
827 storage=data.storage,
Takuto Ikuta90397ca2020-01-08 10:07:55 +0000828 outdir=run_dir,
829 go_cache_dir=data.go_cache_dir,
Takuto Ikuta879788c2020-01-10 08:00:26 +0000830 policies=data.go_cache_policies,
Takuto Ikuta90397ca2020-01-08 10:07:55 +0000831 isolated_client=os.path.join(isolated_client_dir,
832 'isolated' + cipd.EXECUTABLE_SUFFIX))
833 else:
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000834 bundle, stats = fetch_and_map(
835 isolated_hash=data.isolated_hash,
836 storage=data.storage,
837 cache=data.isolate_cache,
Takuto Ikuta16fac4b2019-12-09 04:57:18 +0000838 outdir=run_dir)
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000839 isolated_stats['download'].update(stats)
vadimsh232f5a82017-01-20 19:23:44 -0800840 change_tree_read_only(run_dir, bundle.read_only)
maruelabec63c2017-04-26 11:53:24 -0700841 # Inject the command
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500842 if not command and bundle.command:
843 command = bundle.command + data.extra_args
Marc-Antoine Rueld704a1f2017-10-31 10:51:23 -0400844 # Only set the relative directory if the isolated file specified a
845 # command, and no raw command was specified.
846 if bundle.relative_cwd:
847 cwd = os.path.normpath(os.path.join(cwd, bundle.relative_cwd))
maruelabec63c2017-04-26 11:53:24 -0700848
849 if not command:
850 # Handle this as a task failure, not an internal failure.
851 sys.stderr.write(
852 '<No command was specified!>\n'
853 '<Please secify a command when triggering your Swarming task>\n')
854 result['exit_code'] = 1
855 return result
nodirbe642ff2016-06-09 15:51:51 -0700856
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -0500857 if not cwd.startswith(run_dir):
858 # Handle this as a task failure, not an internal failure. This is a
859 # 'last chance' way to gate against directory escape.
860 sys.stderr.write('<Relative CWD is outside of run directory!>\n')
861 result['exit_code'] = 1
862 return result
863
864 if not os.path.isdir(cwd):
865 # Accepts relative_cwd that does not exist.
Lei Leife202df2019-06-11 17:33:34 +0000866 os.makedirs(cwd, 0o700)
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -0500867
vadimsh232f5a82017-01-20 19:23:44 -0800868 # If we have an explicit list of files to return, make sure their
869 # directories exist now.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500870 if data.storage and data.outputs:
871 isolateserver.create_directories(run_dir, data.outputs)
aludwin0a8e17d2016-10-27 15:57:39 -0700872
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500873 with data.install_named_caches(run_dir):
nodird6160682017-02-02 13:03:35 -0800874 sys.stdout.flush()
875 start = time.time()
876 try:
vadimsh9c54b2c2017-07-25 14:08:29 -0700877 # Need to switch the default account before 'get_command_env' call,
878 # so it can grab correct value of LUCI_CONTEXT env var.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500879 with set_luci_context_account(data.switch_to_account, tmp_dir):
880 env = get_command_env(
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000881 tmp_dir, cipd_info, run_dir, data.env, data.env_prefix, out_dir,
882 data.bot_file)
Brian Sheedy7a761172019-08-30 22:55:14 +0000883 command = tools.find_executable(command, env)
Robert Iannucci24ae76a2018-02-26 12:51:18 -0800884 command = process_command(command, out_dir, data.bot_file)
885 file_path.ensure_command_has_abs_path(command, cwd)
886
vadimsh9c54b2c2017-07-25 14:08:29 -0700887 result['exit_code'], result['had_hard_timeout'] = run_command(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000888 command, cwd, env, data.hard_timeout, data.grace_period,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000889 data.lower_priority, data.containment)
nodird6160682017-02-02 13:03:35 -0800890 finally:
891 result['duration'] = max(time.time() - start, 0)
Seth Koehler49139812017-12-19 13:59:33 -0500892
893 # We successfully ran the command, set internal_failure back to
894 # None (even if the command failed, it's not an internal error).
895 result['internal_failure'] = None
maruela9cfd6f2015-09-15 11:03:15 -0700896 except Exception as e:
nodir90bc8dc2016-06-15 13:35:21 -0700897 # An internal error occurred. Report accordingly so the swarming task will
898 # be retried automatically.
maruel12e30012015-10-09 11:55:35 -0700899 logging.exception('internal failure: %s', e)
maruela9cfd6f2015-09-15 11:03:15 -0700900 result['internal_failure'] = str(e)
901 on_error.report(None)
aludwin0a8e17d2016-10-27 15:57:39 -0700902
903 # Clean up
maruela9cfd6f2015-09-15 11:03:15 -0700904 finally:
905 try:
aludwin0a8e17d2016-10-27 15:57:39 -0700906 # Try to link files to the output directory, if specified.
907 if out_dir:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500908 link_outputs_to_outdir(run_dir, out_dir, data.outputs)
aludwin0a8e17d2016-10-27 15:57:39 -0700909
nodir32a1ec12016-10-26 18:34:07 -0700910 success = False
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500911 if data.leak_temp_dir:
nodir32a1ec12016-10-26 18:34:07 -0700912 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700913 logging.warning(
914 'Deliberately leaking %s for later examination', run_dir)
marueleb5fbee2015-09-17 13:01:36 -0700915 else:
maruel84537cb2015-10-16 14:21:28 -0700916 # On Windows rmtree(run_dir) call above has a synchronization effect: it
917 # finishes only when all task child processes terminate (since a running
918 # process locks *.exe file). Examine out_dir only after that call
919 # completes (since child processes may write to out_dir too and we need
920 # to wait for them to finish).
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000921 for directory in (run_dir, tmp_dir, isolated_client_dir):
Takuto Ikuta69c0d662019-11-27 01:18:08 +0000922 if not fs.isdir(directory):
923 continue
maruel84537cb2015-10-16 14:21:28 -0700924 try:
Takuto Ikuta69c0d662019-11-27 01:18:08 +0000925 success = file_path.rmtree(directory)
maruel84537cb2015-10-16 14:21:28 -0700926 except OSError as e:
Takuto Ikuta69c0d662019-11-27 01:18:08 +0000927 logging.error('rmtree(%r) failed: %s', directory, e)
maruel84537cb2015-10-16 14:21:28 -0700928 success = False
929 if not success:
Takuto Ikuta69c0d662019-11-27 01:18:08 +0000930 sys.stderr.write(
931 OUTLIVING_ZOMBIE_MSG % (directory, data.grace_period))
maruel84537cb2015-10-16 14:21:28 -0700932 if result['exit_code'] == 0:
933 result['exit_code'] = 1
maruela9cfd6f2015-09-15 11:03:15 -0700934
marueleb5fbee2015-09-17 13:01:36 -0700935 # This deletes out_dir if leak_temp_dir is not set.
nodir9130f072016-05-27 13:59:08 -0700936 if out_dir:
nodir55715712016-06-03 12:28:19 -0700937 isolated_stats = result['stats'].setdefault('isolated', {})
938 result['outputs_ref'], success, isolated_stats['upload'] = (
Takuto Ikutab4aa8662019-09-17 05:54:36 +0000939 upload_then_delete(data.storage, out_dir, data.leak_temp_dir))
maruela9cfd6f2015-09-15 11:03:15 -0700940 if not success and result['exit_code'] == 0:
941 result['exit_code'] = 1
942 except Exception as e:
943 # Swallow any exception in the main finally clause.
nodir9130f072016-05-27 13:59:08 -0700944 if out_dir:
945 logging.exception('Leaking out_dir %s: %s', out_dir, e)
maruela9cfd6f2015-09-15 11:03:15 -0700946 result['internal_failure'] = str(e)
947 return result
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500948
949
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500950def run_tha_test(data, result_json):
nodir55be77b2016-05-03 09:39:57 -0700951 """Runs an executable and records execution metadata.
952
nodir55be77b2016-05-03 09:39:57 -0700953 If isolated_hash is specified, downloads the dependencies in the cache,
954 hardlinks them into a temporary directory and runs the command specified in
955 the .isolated.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500956
957 A temporary directory is created to hold the output files. The content inside
958 this directory will be uploaded back to |storage| packaged as a .isolated
959 file.
960
961 Arguments:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500962 - data: TaskData instance.
963 - result_json: File path to dump result metadata into. If set, the process
964 exit code is always 0 unless an internal error occurred.
maruela9cfd6f2015-09-15 11:03:15 -0700965
966 Returns:
967 Process exit code that should be used.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000968 """
maruela76b9ee2015-12-15 06:18:08 -0800969 if result_json:
970 # Write a json output file right away in case we get killed.
971 result = {
972 'exit_code': None,
973 'had_hard_timeout': False,
974 'internal_failure': 'Was terminated before completion',
975 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700976 'version': 5,
maruela76b9ee2015-12-15 06:18:08 -0800977 }
978 tools.write_json(result_json, result, dense=True)
979
maruela9cfd6f2015-09-15 11:03:15 -0700980 # run_isolated exit code. Depends on if result_json is used or not.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500981 result = map_and_run(data, True)
maruela9cfd6f2015-09-15 11:03:15 -0700982 logging.info('Result:\n%s', tools.format_json(result, dense=True))
bpastene3ae09522016-06-10 17:12:59 -0700983
maruela9cfd6f2015-09-15 11:03:15 -0700984 if result_json:
maruel05d5a882015-09-21 13:59:02 -0700985 # We've found tests to delete 'work' when quitting, causing an exception
986 # here. Try to recreate the directory if necessary.
nodire5028a92016-04-29 14:38:21 -0700987 file_path.ensure_tree(os.path.dirname(result_json))
maruela9cfd6f2015-09-15 11:03:15 -0700988 tools.write_json(result_json, result, dense=True)
989 # Only return 1 if there was an internal error.
990 return int(bool(result['internal_failure']))
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000991
maruela9cfd6f2015-09-15 11:03:15 -0700992 # Marshall into old-style inline output.
993 if result['outputs_ref']:
Marc-Antoine Ruel793bff32019-04-18 17:50:48 +0000994 # pylint: disable=unsubscriptable-object
maruela9cfd6f2015-09-15 11:03:15 -0700995 data = {
996 'hash': result['outputs_ref']['isolated'],
997 'namespace': result['outputs_ref']['namespace'],
998 'storage': result['outputs_ref']['isolatedserver'],
999 }
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -05001000 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -07001001 print(
1002 '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
1003 tools.format_json(data, dense=True))
maruelb76604c2015-11-11 11:53:44 -08001004 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -07001005 return result['exit_code'] or int(bool(result['internal_failure']))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001006
1007
iannuccib58d10d2017-03-18 02:00:25 -07001008# Yielded by 'install_client_and_packages'.
vadimsh232f5a82017-01-20 19:23:44 -08001009CipdInfo = collections.namedtuple('CipdInfo', [
1010 'client', # cipd.CipdClient object
1011 'cache_dir', # absolute path to bot-global cipd tag and instance cache
1012 'stats', # dict with stats to return to the server
1013 'pins', # dict with installed cipd pins to return to the server
1014])
1015
1016
1017@contextlib.contextmanager
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001018def noop_install_packages(_run_dir, _isolated_dir):
iannuccib58d10d2017-03-18 02:00:25 -07001019 """Placeholder for 'install_client_and_packages' if cipd is disabled."""
vadimsh232f5a82017-01-20 19:23:44 -08001020 yield None
1021
1022
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001023def _install_packages(run_dir, cipd_cache_dir, client, packages):
iannuccib58d10d2017-03-18 02:00:25 -07001024 """Calls 'cipd ensure' for packages.
1025
1026 Args:
1027 run_dir (str): root of installation.
1028 cipd_cache_dir (str): the directory to use for the cipd package cache.
1029 client (CipdClient): the cipd client to use
1030 packages: packages to install, list [(path, package_name, version), ...].
iannuccib58d10d2017-03-18 02:00:25 -07001031
1032 Returns: list of pinned packages. Looks like [
1033 {
1034 'path': 'subdirectory',
1035 'package_name': 'resolved/package/name',
1036 'version': 'deadbeef...',
1037 },
1038 ...
1039 ]
1040 """
1041 package_pins = [None]*len(packages)
1042 def insert_pin(path, name, version, idx):
1043 package_pins[idx] = {
1044 'package_name': name,
1045 # swarming deals with 'root' as '.'
1046 'path': path or '.',
1047 'version': version,
1048 }
1049
1050 by_path = collections.defaultdict(list)
1051 for i, (path, name, version) in enumerate(packages):
1052 # cipd deals with 'root' as ''
1053 if path == '.':
1054 path = ''
1055 by_path[path].append((name, version, i))
1056
1057 pins = client.ensure(
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001058 run_dir,
1059 {
1060 subdir: [(name, vers) for name, vers, _ in pkgs
1061 ] for subdir, pkgs in by_path.items()
1062 },
1063 cache_dir=cipd_cache_dir,
iannuccib58d10d2017-03-18 02:00:25 -07001064 )
1065
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001066 for subdir, pin_list in sorted(pins.items()):
iannuccib58d10d2017-03-18 02:00:25 -07001067 this_subdir = by_path[subdir]
1068 for i, (name, version) in enumerate(pin_list):
1069 insert_pin(subdir, name, version, this_subdir[i][2])
1070
Robert Iannucci461b30d2017-12-13 11:34:03 -08001071 assert None not in package_pins, (packages, pins, package_pins)
iannuccib58d10d2017-03-18 02:00:25 -07001072
1073 return package_pins
1074
1075
vadimsh232f5a82017-01-20 19:23:44 -08001076@contextlib.contextmanager
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001077def install_client_and_packages(run_dir, packages, service_url,
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001078 client_package_name, client_version, cache_dir,
1079 isolated_dir):
vadimsh902948e2017-01-20 15:57:32 -08001080 """Bootstraps CIPD client and installs CIPD packages.
iannucci96fcccc2016-08-30 15:52:22 -07001081
vadimsh232f5a82017-01-20 19:23:44 -08001082 Yields CipdClient, stats, client info and pins (as single CipdInfo object).
1083
1084 Pins and the CIPD client info are in the form of:
iannucci96fcccc2016-08-30 15:52:22 -07001085 [
1086 {
1087 "path": path, "package_name": package_name, "version": version,
1088 },
1089 ...
1090 ]
vadimsh902948e2017-01-20 15:57:32 -08001091 (the CIPD client info is a single dictionary instead of a list)
iannucci96fcccc2016-08-30 15:52:22 -07001092
1093 such that they correspond 1:1 to all input package arguments from the command
1094 line. These dictionaries make their all the way back to swarming, where they
1095 become the arguments of CipdPackage.
nodirbe642ff2016-06-09 15:51:51 -07001096
vadimsh902948e2017-01-20 15:57:32 -08001097 If 'packages' list is empty, will bootstrap CIPD client, but won't install
1098 any packages.
1099
1100 The bootstrapped client (regardless whether 'packages' list is empty or not),
vadimsh232f5a82017-01-20 19:23:44 -08001101 will be made available to the task via $PATH.
vadimsh902948e2017-01-20 15:57:32 -08001102
nodirbe642ff2016-06-09 15:51:51 -07001103 Args:
nodir90bc8dc2016-06-15 13:35:21 -07001104 run_dir (str): root of installation.
vadimsh902948e2017-01-20 15:57:32 -08001105 packages: packages to install, list [(path, package_name, version), ...].
nodirbe642ff2016-06-09 15:51:51 -07001106 service_url (str): CIPD server url, e.g.
1107 "https://chrome-infra-packages.appspot.com."
nodir90bc8dc2016-06-15 13:35:21 -07001108 client_package_name (str): CIPD package name of CIPD client.
1109 client_version (str): Version of CIPD client.
nodirbe642ff2016-06-09 15:51:51 -07001110 cache_dir (str): where to keep cache of cipd clients, packages and tags.
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001111 isolated_dir (str): where to download isolated client.
nodirbe642ff2016-06-09 15:51:51 -07001112 """
1113 assert cache_dir
nodir90bc8dc2016-06-15 13:35:21 -07001114
nodirbe642ff2016-06-09 15:51:51 -07001115 start = time.time()
nodirbe642ff2016-06-09 15:51:51 -07001116
vadimsh902948e2017-01-20 15:57:32 -08001117 cache_dir = os.path.abspath(cache_dir)
vadimsh232f5a82017-01-20 19:23:44 -08001118 cipd_cache_dir = os.path.join(cache_dir, 'cache') # tag and instance caches
nodir90bc8dc2016-06-15 13:35:21 -07001119 run_dir = os.path.abspath(run_dir)
vadimsh902948e2017-01-20 15:57:32 -08001120 packages = packages or []
nodir90bc8dc2016-06-15 13:35:21 -07001121
nodirbe642ff2016-06-09 15:51:51 -07001122 get_client_start = time.time()
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001123 client_manager = cipd.get_client(service_url, client_package_name,
1124 client_version, cache_dir)
iannucci96fcccc2016-08-30 15:52:22 -07001125
nodirbe642ff2016-06-09 15:51:51 -07001126 with client_manager as client:
1127 get_client_duration = time.time() - get_client_start
nodir90bc8dc2016-06-15 13:35:21 -07001128
iannuccib58d10d2017-03-18 02:00:25 -07001129 package_pins = []
1130 if packages:
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001131 package_pins = _install_packages(run_dir, cipd_cache_dir, client,
1132 packages)
iannuccib58d10d2017-03-18 02:00:25 -07001133
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001134 # Install isolated client to |isolated_dir|.
Takuto Ikuta02edca22019-11-29 10:04:51 +00001135 _install_packages(isolated_dir, cipd_cache_dir, client,
1136 [('', ISOLATED_PACKAGE, ISOLATED_REVISION)])
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001137
iannuccib58d10d2017-03-18 02:00:25 -07001138 file_path.make_tree_files_read_only(run_dir)
nodir90bc8dc2016-06-15 13:35:21 -07001139
vadimsh232f5a82017-01-20 19:23:44 -08001140 total_duration = time.time() - start
1141 logging.info(
1142 'Installing CIPD client and packages took %d seconds', total_duration)
nodir90bc8dc2016-06-15 13:35:21 -07001143
vadimsh232f5a82017-01-20 19:23:44 -08001144 yield CipdInfo(
1145 client=client,
1146 cache_dir=cipd_cache_dir,
1147 stats={
1148 'duration': total_duration,
1149 'get_client_duration': get_client_duration,
1150 },
1151 pins={
iannuccib58d10d2017-03-18 02:00:25 -07001152 'client_package': {
1153 'package_name': client.package_name,
1154 'version': client.instance_id,
1155 },
vadimsh232f5a82017-01-20 19:23:44 -08001156 'packages': package_pins,
1157 })
nodirbe642ff2016-06-09 15:51:51 -07001158
1159
1160def create_option_parser():
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04001161 parser = logging_utils.OptionParserWithLogging(
nodir55be77b2016-05-03 09:39:57 -07001162 usage='%prog <options> [command to run or extra args]',
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001163 version=__version__,
1164 log_file=RUN_ISOLATED_LOG_FILE)
maruela9cfd6f2015-09-15 11:03:15 -07001165 parser.add_option(
maruel36a963d2016-04-08 17:15:49 -07001166 '--clean', action='store_true',
1167 help='Cleans the cache, trimming it necessary and remove corrupted items '
1168 'and returns without executing anything; use with -v to know what '
1169 'was done')
1170 parser.add_option(
maruela9cfd6f2015-09-15 11:03:15 -07001171 '--json',
1172 help='dump output metadata to json file. When used, run_isolated returns '
1173 'non-zero only on internal failure')
maruel6be7f9e2015-10-01 12:25:30 -07001174 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -08001175 '--hard-timeout', type='float', help='Enforce hard timeout in execution')
maruel6be7f9e2015-10-01 12:25:30 -07001176 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -08001177 '--grace-period', type='float',
maruel6be7f9e2015-10-01 12:25:30 -07001178 help='Grace period between SIGTERM and SIGKILL')
bpastene3ae09522016-06-10 17:12:59 -07001179 parser.add_option(
Marc-Antoine Ruel49e347d2017-10-24 16:52:02 -07001180 '--raw-cmd', action='store_true',
1181 help='Ignore the isolated command, use the one supplied at the command '
1182 'line')
1183 parser.add_option(
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001184 '--relative-cwd',
1185 help='Ignore the isolated \'relative_cwd\' and use this one instead; '
1186 'requires --raw-cmd')
1187 parser.add_option(
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001188 '--env', default=[], action='append',
1189 help='Environment variables to set for the child process')
1190 parser.add_option(
1191 '--env-prefix', default=[], action='append',
Robert Iannuccibf5f84c2017-11-22 12:56:50 -08001192 help='Specify a VAR=./path/fragment to put in the environment variable '
1193 'before executing the command. The path fragment must be relative '
1194 'to the isolated run directory, and must not contain a `..` token. '
1195 'The path will be made absolute and prepended to the indicated '
1196 '$VAR using the OS\'s path separator. Multiple items for the same '
1197 '$VAR will be prepended in order.')
1198 parser.add_option(
bpastene3ae09522016-06-10 17:12:59 -07001199 '--bot-file',
1200 help='Path to a file describing the state of the host. The content is '
1201 'defined by on_before_task() in bot_config.')
aludwin7556e0c2016-10-26 08:46:10 -07001202 parser.add_option(
vadimsh9c54b2c2017-07-25 14:08:29 -07001203 '--switch-to-account',
1204 help='If given, switches LUCI_CONTEXT to given logical service account '
1205 '(e.g. "task" or "system") before launching the isolated process.')
1206 parser.add_option(
aludwin0a8e17d2016-10-27 15:57:39 -07001207 '--output', action='append',
1208 help='Specifies an output to return. If no outputs are specified, all '
1209 'files located in $(ISOLATED_OUTDIR) will be returned; '
1210 'otherwise, outputs in both $(ISOLATED_OUTDIR) and those '
1211 'specified by --output option (there can be multiple) will be '
1212 'returned. Note that if a file in OUT_DIR has the same path '
1213 'as an --output option, the --output version will be returned.')
1214 parser.add_option(
aludwin7556e0c2016-10-26 08:46:10 -07001215 '-a', '--argsfile',
1216 # This is actually handled in parse_args; it's included here purely so it
1217 # can make it into the help text.
1218 help='Specify a file containing a JSON array of arguments to this '
1219 'script. If --argsfile is provided, no other argument may be '
1220 'provided on the command line.')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001221
1222 group = optparse.OptionGroup(parser, 'Data source')
1223 group.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -05001224 '-s', '--isolated',
nodir55be77b2016-05-03 09:39:57 -07001225 help='Hash of the .isolated to grab from the isolate server.')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001226 isolateserver.add_isolate_server_options(group)
1227 parser.add_option_group(group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001228
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04001229 isolateserver.add_cache_options(parser)
nodirbe642ff2016-06-09 15:51:51 -07001230
1231 cipd.add_cipd_options(parser)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001232
1233 group = optparse.OptionGroup(parser, 'Named caches')
1234 group.add_option(
1235 '--named-cache',
1236 dest='named_caches',
1237 action='append',
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001238 nargs=3,
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001239 default=[],
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001240 help='A named cache to request. Accepts 3 arguments: name, path, hint. '
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001241 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. '
1242 'path is a path relative to the run dir where the cache directory '
1243 'must be put to. '
1244 'This option can be specified more than once.')
1245 group.add_option(
1246 '--named-cache-root', default='named_caches',
1247 help='Cache root directory. Default=%default')
1248 parser.add_option_group(group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001249
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001250 group = optparse.OptionGroup(parser, 'Process containment')
1251 parser.add_option(
1252 '--lower-priority', action='store_true',
1253 help='Lowers the child process priority')
1254 parser.add_option(
1255 '--containment-type', choices=('NONE', 'AUTO', 'JOB_OBJECT'),
1256 default='NONE',
1257 help='Type of container to use')
1258 parser.add_option(
1259 '--limit-processes', type='int', default=0,
1260 help='Maximum number of active processes in the containment')
1261 parser.add_option(
1262 '--limit-total-committed-memory', type='int', default=0,
1263 help='Maximum sum of committed memory in the containment')
1264 parser.add_option_group(group)
1265
1266 group = optparse.OptionGroup(parser, 'Debugging')
1267 group.add_option(
Kenneth Russell61d42352014-09-15 11:41:16 -07001268 '--leak-temp-dir',
1269 action='store_true',
nodirbe642ff2016-06-09 15:51:51 -07001270 help='Deliberately leak isolate\'s temp dir for later examination. '
1271 'Default: %default')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001272 group.add_option(
marueleb5fbee2015-09-17 13:01:36 -07001273 '--root-dir', help='Use a directory instead of a random one')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001274 parser.add_option_group(group)
Kenneth Russell61d42352014-09-15 11:41:16 -07001275
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001276 auth.add_auth_options(parser)
nodirbe642ff2016-06-09 15:51:51 -07001277
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001278 parser.set_defaults(cache='cache', cipd_cache='cipd_cache')
nodirbe642ff2016-06-09 15:51:51 -07001279 return parser
1280
1281
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001282def process_named_cache_options(parser, options, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001283 """Validates named cache options and returns a CacheManager."""
1284 if options.named_caches and not options.named_cache_root:
1285 parser.error('--named-cache is specified, but --named-cache-root is empty')
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001286 for name, path, hint in options.named_caches:
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001287 if not CACHE_NAME_RE.match(name):
1288 parser.error(
1289 'cache name %r does not match %r' % (name, CACHE_NAME_RE.pattern))
1290 if not path:
1291 parser.error('cache path cannot be empty')
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001292 try:
1293 long(hint)
1294 except ValueError:
1295 parser.error('cache hint must be a number')
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001296 if options.named_cache_root:
1297 # Make these configurable later if there is use case but for now it's fairly
1298 # safe values.
1299 # In practice, a fair chunk of bots are already recycled on a daily schedule
1300 # so this code doesn't have any effect to them, unless they are preloaded
1301 # with a really old cache.
1302 policies = local_caching.CachePolicies(
1303 # 1TiB.
1304 max_cache_size=1024*1024*1024*1024,
1305 min_free_space=options.min_free_space,
1306 max_items=50,
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001307 max_age_secs=MAX_AGE_SECS)
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001308 root_dir = six.text_type(os.path.abspath(options.named_cache_root))
John Budorickc6186972020-02-26 00:58:14 +00001309 cache = local_caching.NamedCache(root_dir, policies, time_fn=time_fn)
1310 # Touch any named caches we're going to use to minimize thrashing
1311 # between tasks that request some (but not all) of the same named caches.
John Budorick0a4dab62020-03-02 22:23:35 +00001312 cache.touch(*[name for name, _, _ in options.named_caches])
John Budorickc6186972020-02-26 00:58:14 +00001313 return cache
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001314 return None
1315
1316
aludwin7556e0c2016-10-26 08:46:10 -07001317def parse_args(args):
1318 # Create a fake mini-parser just to get out the "-a" command. Note that
1319 # it's not documented here; instead, it's documented in create_option_parser
1320 # even though that parser will never actually get to parse it. This is
1321 # because --argsfile is exclusive with all other options and arguments.
1322 file_argparse = argparse.ArgumentParser(add_help=False)
1323 file_argparse.add_argument('-a', '--argsfile')
1324 (file_args, nonfile_args) = file_argparse.parse_known_args(args)
1325 if file_args.argsfile:
1326 if nonfile_args:
1327 file_argparse.error('Can\'t specify --argsfile with'
1328 'any other arguments (%s)' % nonfile_args)
1329 try:
1330 with open(file_args.argsfile, 'r') as f:
1331 args = json.load(f)
1332 except (IOError, OSError, ValueError) as e:
1333 # We don't need to error out here - "args" is now empty,
1334 # so the call below to parser.parse_args(args) will fail
1335 # and print the full help text.
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +00001336 print('Couldn\'t read arguments: %s' % e, file=sys.stderr)
aludwin7556e0c2016-10-26 08:46:10 -07001337
1338 # Even if we failed to read the args, just call the normal parser now since it
1339 # will print the correct help message.
nodirbe642ff2016-06-09 15:51:51 -07001340 parser = create_option_parser()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001341 options, args = parser.parse_args(args)
aludwin7556e0c2016-10-26 08:46:10 -07001342 return (parser, options, args)
1343
1344
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001345def _calc_named_cache_hint(named_cache, named_caches):
1346 """Returns the expected size of the missing named caches."""
1347 present = named_cache.available
1348 size = 0
1349 for name, _, hint in named_caches:
1350 if name not in present:
1351 hint = long(hint)
1352 if hint > 0:
1353 size += hint
1354 return size
1355
1356
aludwin7556e0c2016-10-26 08:46:10 -07001357def main(args):
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -05001358 # Warning: when --argsfile is used, the strings are unicode instances, when
1359 # parsed normally, the strings are str instances.
aludwin7556e0c2016-10-26 08:46:10 -07001360 (parser, options, args) = parse_args(args)
maruel36a963d2016-04-08 17:15:49 -07001361
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001362 if not file_path.enable_symlink():
Marc-Antoine Ruel5a024272019-01-15 20:11:16 +00001363 logging.warning('Symlink support is not enabled')
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001364
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001365 named_cache = process_named_cache_options(parser, options)
Marc-Antoine Ruel0d8b0f62018-09-10 14:40:35 +00001366 # hint is 0 if there's no named cache.
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001367 hint = _calc_named_cache_hint(named_cache, options.named_caches)
1368 if hint:
1369 # Increase the --min-free-space value by the hint, and recreate the
1370 # NamedCache instance so it gets the updated CachePolicy.
1371 options.min_free_space += hint
1372 named_cache = process_named_cache_options(parser, options)
1373
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001374 # TODO(crbug.com/932396): Remove this.
1375 use_go_isolated = (
1376 options.cipd_enabled and
Takuto Ikuta64a9c2c2020-02-06 06:10:07 +00001377 # TODO(crbug.com/1045281): windows other than win10 has flaky connection
1378 # issue.
1379 (sys.platform != 'win32' or platform.release() == '10'))
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001380
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001381 # TODO(maruel): CIPD caches should be defined at an higher level here too, so
1382 # they can be cleaned the same way.
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001383 if use_go_isolated:
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +00001384 isolate_cache = None
1385 else:
1386 isolate_cache = isolateserver.process_cache_options(options, trim=False)
1387
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001388 caches = []
1389 if isolate_cache:
1390 caches.append(isolate_cache)
1391 if named_cache:
1392 caches.append(named_cache)
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001393 root = caches[0].cache_dir if caches else six.text_type(os.getcwd())
maruel36a963d2016-04-08 17:15:49 -07001394 if options.clean:
1395 if options.isolated:
1396 parser.error('Can\'t use --isolated with --clean.')
1397 if options.isolate_server:
1398 parser.error('Can\'t use --isolate-server with --clean.')
1399 if options.json:
1400 parser.error('Can\'t use --json with --clean.')
nodirf33b8d62016-10-26 22:34:58 -07001401 if options.named_caches:
1402 parser.error('Can\t use --named-cache with --clean.')
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001403 # Trim first, then clean.
1404 local_caching.trim_caches(
1405 caches,
1406 root,
1407 min_free_space=options.min_free_space,
1408 max_age_secs=MAX_AGE_SECS)
1409 for c in caches:
Marc-Antoine Ruel87fc2222018-06-18 13:09:24 +00001410 c.cleanup()
maruel36a963d2016-04-08 17:15:49 -07001411 return 0
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001412
1413 # Trim must still be done for the following case:
1414 # - named-cache was used
1415 # - some entries, with a large hint, where missing
1416 # - --min-free-space was increased accordingly, thus trimming is needed
1417 # Otherwise, this will have no effect, as bot_main calls run_isolated with
1418 # --clean after each task.
1419 if hint:
1420 logging.info('Additional trimming of %d bytes', hint)
Marc-Antoine Ruel0d8b0f62018-09-10 14:40:35 +00001421 local_caching.trim_caches(
1422 caches,
1423 root,
1424 min_free_space=options.min_free_space,
1425 max_age_secs=MAX_AGE_SECS)
maruel36a963d2016-04-08 17:15:49 -07001426
nodir55be77b2016-05-03 09:39:57 -07001427 if not options.isolated and not args:
1428 parser.error('--isolated or command to run is required.')
1429
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001430 auth.process_auth_options(parser, options)
nodir55be77b2016-05-03 09:39:57 -07001431
1432 isolateserver.process_isolate_server_options(
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001433 parser, options, True, False)
nodir55be77b2016-05-03 09:39:57 -07001434 if not options.isolate_server:
1435 if options.isolated:
1436 parser.error('--isolated requires --isolate-server')
1437 if ISOLATED_OUTDIR_PARAMETER in args:
1438 parser.error(
1439 '%s in args requires --isolate-server' % ISOLATED_OUTDIR_PARAMETER)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001440
nodir90bc8dc2016-06-15 13:35:21 -07001441 if options.root_dir:
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001442 options.root_dir = six.text_type(os.path.abspath(options.root_dir))
maruel12e30012015-10-09 11:55:35 -07001443 if options.json:
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001444 options.json = six.text_type(os.path.abspath(options.json))
nodir55be77b2016-05-03 09:39:57 -07001445
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001446 if any('=' not in i for i in options.env):
1447 parser.error(
1448 '--env required key=value form. value can be skipped to delete '
1449 'the variable')
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -05001450 options.env = dict(i.split('=', 1) for i in options.env)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001451
1452 prefixes = {}
1453 cwd = os.path.realpath(os.getcwd())
1454 for item in options.env_prefix:
1455 if '=' not in item:
1456 parser.error(
1457 '--env-prefix %r is malformed, must be in the form `VAR=./path`'
1458 % item)
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -05001459 key, opath = item.split('=', 1)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001460 if os.path.isabs(opath):
1461 parser.error('--env-prefix %r path is bad, must be relative.' % opath)
1462 opath = os.path.normpath(opath)
1463 if not os.path.realpath(os.path.join(cwd, opath)).startswith(cwd):
1464 parser.error(
1465 '--env-prefix %r path is bad, must be relative and not contain `..`.'
1466 % opath)
1467 prefixes.setdefault(key, []).append(opath)
1468 options.env_prefix = prefixes
Robert Iannuccibf5f84c2017-11-22 12:56:50 -08001469
nodirbe642ff2016-06-09 15:51:51 -07001470 cipd.validate_cipd_options(parser, options)
1471
vadimsh232f5a82017-01-20 19:23:44 -08001472 install_packages_fn = noop_install_packages
vadimsh902948e2017-01-20 15:57:32 -08001473 if options.cipd_enabled:
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001474 install_packages_fn = (
1475 lambda run_dir, isolated_dir: install_client_and_packages(
vadimsh902948e2017-01-20 15:57:32 -08001476 run_dir, cipd.parse_package_args(options.cipd_packages),
1477 options.cipd_server, options.cipd_client_package,
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001478 options.cipd_client_version, cache_dir=options.cipd_cache,
1479 isolated_dir=isolated_dir))
nodirbe642ff2016-06-09 15:51:51 -07001480
nodird6160682017-02-02 13:03:35 -08001481 @contextlib.contextmanager
nodir0ae98b32017-05-11 13:21:53 -07001482 def install_named_caches(run_dir):
nodird6160682017-02-02 13:03:35 -08001483 # WARNING: this function depends on "options" variable defined in the outer
1484 # function.
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001485 assert six.text_type(run_dir), repr(run_dir)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001486 assert os.path.isabs(run_dir), run_dir
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001487 named_caches = [(os.path.join(run_dir, six.text_type(relpath)), name)
1488 for name, relpath, _ in options.named_caches]
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001489 for path, name in named_caches:
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001490 named_cache.install(path, name)
nodird6160682017-02-02 13:03:35 -08001491 try:
1492 yield
1493 finally:
dnje289d132017-07-07 11:16:44 -07001494 # Uninstall each named cache, returning it to the cache pool. If an
1495 # uninstall fails for a given cache, it will remain in the task's
1496 # temporary space, get cleaned up by the Swarming bot, and be lost.
1497 #
1498 # If the Swarming bot cannot clean up the cache, it will handle it like
1499 # any other bot file that could not be removed.
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001500 for path, name in reversed(named_caches):
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001501 try:
Marc-Antoine Ruele9558372018-08-03 03:41:22 +00001502 # uninstall() doesn't trim but does call save() implicitly. Trimming
1503 # *must* be done manually via periodic 'run_isolated.py --clean'.
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001504 named_cache.uninstall(path, name)
1505 except local_caching.NamedCacheError:
1506 logging.exception('Error while removing named cache %r at %r. '
1507 'The cache will be lost.', path, name)
nodirf33b8d62016-10-26 22:34:58 -07001508
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001509 extra_args = []
1510 command = []
1511 if options.raw_cmd:
1512 command = args
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001513 if options.relative_cwd:
1514 a = os.path.normpath(os.path.abspath(options.relative_cwd))
1515 if not a.startswith(os.getcwd()):
1516 parser.error(
1517 '--relative-cwd must not try to escape the working directory')
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001518 else:
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001519 if options.relative_cwd:
1520 parser.error('--relative-cwd requires --raw-cmd')
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001521 extra_args = args
1522
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001523 containment_type = subprocess42.Containment.NONE
1524 if options.containment_type == 'AUTO':
1525 containment_type = subprocess42.Containment.AUTO
1526 if options.containment_type == 'JOB_OBJECT':
1527 containment_type = subprocess42.Containment.JOB_OBJECT
1528 containment = subprocess42.Containment(
1529 containment_type=containment_type,
1530 limit_processes=options.limit_processes,
1531 limit_total_committed_memory=options.limit_total_committed_memory)
1532
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001533 data = TaskData(
1534 command=command,
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001535 relative_cwd=options.relative_cwd,
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001536 extra_args=extra_args,
1537 isolated_hash=options.isolated,
1538 storage=None,
1539 isolate_cache=isolate_cache,
1540 outputs=options.output,
1541 install_named_caches=install_named_caches,
1542 leak_temp_dir=options.leak_temp_dir,
1543 root_dir=_to_unicode(options.root_dir),
1544 hard_timeout=options.hard_timeout,
1545 grace_period=options.grace_period,
1546 bot_file=options.bot_file,
1547 switch_to_account=options.switch_to_account,
1548 install_packages_fn=install_packages_fn,
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001549 use_go_isolated=use_go_isolated,
Takuto Ikuta10cae642020-01-08 08:12:07 +00001550 go_cache_dir=options.cache,
Takuto Ikuta879788c2020-01-10 08:00:26 +00001551 go_cache_policies=local_caching.CachePolicies(
1552 max_cache_size=options.max_cache_size,
1553 min_free_space=options.min_free_space,
1554 max_items=options.max_items,
1555 max_age_secs=None,
1556 ),
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001557 env=options.env,
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +00001558 env_prefix=options.env_prefix,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001559 lower_priority=bool(options.lower_priority),
1560 containment=containment)
nodirbe642ff2016-06-09 15:51:51 -07001561 try:
nodir90bc8dc2016-06-15 13:35:21 -07001562 if options.isolate_server:
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +00001563 server_ref = isolate_storage.ServerRef(
nodir90bc8dc2016-06-15 13:35:21 -07001564 options.isolate_server, options.namespace)
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +00001565 storage = isolateserver.get_storage(server_ref)
nodir90bc8dc2016-06-15 13:35:21 -07001566 with storage:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001567 data = data._replace(storage=storage)
nodirf33b8d62016-10-26 22:34:58 -07001568 # Hashing schemes used by |storage| and |isolate_cache| MUST match.
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +00001569 assert storage.server_ref.hash_algo == server_ref.hash_algo
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001570 return run_tha_test(data, options.json)
1571 return run_tha_test(data, options.json)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001572 except (
1573 cipd.Error,
1574 local_caching.NamedCacheError,
Marc-Antoine Ruelb6e9e232018-11-20 00:12:33 +00001575 local_caching.NoMoreSpace) as ex:
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +00001576 print(ex.message, file=sys.stderr)
nodirbe642ff2016-06-09 15:51:51 -07001577 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001578
1579
1580if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -07001581 subprocess42.inhibit_os_error_reporting()
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001582 # Ensure that we are always running with the correct encoding.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001583 fix_encoding.fix_encoding()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001584 sys.exit(main(sys.argv[1:]))