blob: 8c61a1a467449d2815ea3ee4f8dcd84e10a5b232 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
maruelea586f32016-04-05 11:11:33 -07002# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07003# Use of this source code is governed under the Apache License, Version 2.0
4# that can be found in the LICENSE file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00005
nodir55be77b2016-05-03 09:39:57 -07006"""Runs a command with optional isolated input/output.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +00008run_isolated takes cares of setting up a temporary environment, running a
9command, and tearing it down.
nodir55be77b2016-05-03 09:39:57 -070010
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +000011It handles downloading and uploading isolated files, mapping CIPD packages and
12reusing stateful named caches.
13
14The isolated files, CIPD packages and named caches are kept as a global LRU
15cache.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050016
Roberto Carrillo71ade6d2018-10-08 22:30:24 +000017Any ${EXECUTABLE_SUFFIX} on the command line or the environment variables passed
18with the --env option will be replaced with ".exe" string on Windows and "" on
19other platforms.
nodirbe642ff2016-06-09 15:51:51 -070020
Roberto Carrillo71ade6d2018-10-08 22:30:24 +000021Any ${ISOLATED_OUTDIR} on the command line or the environment variables passed
22with the --env option will be replaced by the location of a temporary directory
23upon execution of the command specified in the .isolated file. All content
24written to this directory will be uploaded upon termination and the .isolated
25file describing this directory will be printed to stdout.
bpastene447c1992016-06-20 15:21:47 -070026
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +000027Any ${SWARMING_BOT_FILE} on the command line or the environment variables passed
28with the --env option will be replaced by the value of the --bot-file parameter.
29This file is used by a swarming bot to communicate state of the host to tasks.
30It is written to by the swarming bot's on_before_task() hook in the swarming
31server's custom bot_config.py.
32
33See
34https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Magic-Values.md
35for all the variables.
36
37See
38https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/swarming_bot/config/bot_config.py
39for more information about bot_config.py.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000040"""
41
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +000042from __future__ import print_function
43
44__version__ = '1.0.1'
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000045
aludwin7556e0c2016-10-26 08:46:10 -070046import argparse
maruel064c0a32016-04-05 11:47:15 -070047import base64
iannucci96fcccc2016-08-30 15:52:22 -070048import collections
vadimsh232f5a82017-01-20 19:23:44 -080049import contextlib
Ye Kuangfff1e502020-07-13 13:21:57 +000050import distutils
Sadaf Matinkhoo10743a62018-03-29 16:28:58 -040051import errno
aludwin7556e0c2016-10-26 08:46:10 -070052import json
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000053import logging
54import optparse
55import os
Takuto Ikuta5c59a842020-01-24 03:05:24 +000056import platform
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040057import re
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000058import sys
59import tempfile
maruel064c0a32016-04-05 11:47:15 -070060import time
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000061
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000062from utils import tools
63tools.force_local_third_party()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000064
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000065# third_party/
66from depot_tools import fix_encoding
Takuto Ikuta6e2ff962019-10-29 12:35:27 +000067import six
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000068
69# pylint: disable=ungrouped-imports
70import auth
71import cipd
72import isolate_storage
73import isolateserver
74import local_caching
75from libs import luci_context
Vadim Shtayura6b555c12014-07-23 16:22:18 -070076from utils import file_path
maruel12e30012015-10-09 11:55:35 -070077from utils import fs
maruel064c0a32016-04-05 11:47:15 -070078from utils import large
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040079from utils import logging_utils
Ye Kuang2dd17442020-04-22 08:45:52 +000080from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040081from utils import on_error
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -050082from utils import subprocess42
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000083
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000084
maruele2f2cb82016-07-13 14:41:03 -070085# Magic variables that can be found in the isolate task command line.
86ISOLATED_OUTDIR_PARAMETER = '${ISOLATED_OUTDIR}'
87EXECUTABLE_SUFFIX_PARAMETER = '${EXECUTABLE_SUFFIX}'
88SWARMING_BOT_FILE_PARAMETER = '${SWARMING_BOT_FILE}'
89
90
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000091# The name of the log file to use.
92RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
93
maruele2f2cb82016-07-13 14:41:03 -070094
maruele2f2cb82016-07-13 14:41:03 -070095# Use short names for temporary directories. This is driven by Windows, which
96# imposes a relatively short maximum path length of 260 characters, often
97# referred to as MAX_PATH. It is relatively easy to create files with longer
Marc-Antoine Ruel793bff32019-04-18 17:50:48 +000098# path length. A use case is with recursive dependency trees like npm packages.
maruele2f2cb82016-07-13 14:41:03 -070099#
100# It is recommended to start the script with a `root_dir` as short as
101# possible.
102# - ir stands for isolated_run
103# - io stands for isolated_out
104# - it stands for isolated_tmp
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000105# - ic stands for isolated_client
maruele2f2cb82016-07-13 14:41:03 -0700106ISOLATED_RUN_DIR = u'ir'
107ISOLATED_OUT_DIR = u'io'
108ISOLATED_TMP_DIR = u'it'
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000109ISOLATED_CLIENT_DIR = u'ic'
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000110_CAS_CLIENT_DIR = u'cc'
maruele2f2cb82016-07-13 14:41:03 -0700111
Takuto Ikuta02edca22019-11-29 10:04:51 +0000112# TODO(tikuta): take these parameter from luci-config?
Takuto Ikuta9c4eb1d2020-10-05 03:40:14 +0000113# Update tag by `./client/update_go_clients.sh`.
Takuto Ikutac8c92e62020-04-01 07:07:29 +0000114# Or take revision from
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000115# https://ci.chromium.org/p/infra-internal/g/infra-packagers/console
Takuto Ikuta02edca22019-11-29 10:04:51 +0000116ISOLATED_PACKAGE = 'infra/tools/luci/isolated/${platform}'
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000117_CAS_PACKAGE = 'infra/tools/luci/cas/${platform}'
Takuto Ikutaee04e2c2021-04-19 08:51:07 +0000118_LUCI_GO_REVISION = 'git_revision:31a4144137d33772f7ded15e54cabad2dea4e560'
maruele2f2cb82016-07-13 14:41:03 -0700119
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400120# Keep synced with task_request.py
Lei Leife202df2019-06-11 17:33:34 +0000121CACHE_NAME_RE = re.compile(r'^[a-z0-9_]{1,4096}$')
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400122
Takuto Ikutac9ddff22021-02-18 07:58:39 +0000123_FREE_SPACE_BUFFER_FOR_CIPD_PACKAGES = 2 * 1024 * 1024 * 1024
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400124
marueld928c862017-06-08 08:20:04 -0700125OUTLIVING_ZOMBIE_MSG = """\
126*** Swarming tried multiple times to delete the %s directory and failed ***
127*** Hard failing the task ***
128
129Swarming detected that your testing script ran an executable, which may have
130started a child executable, and the main script returned early, leaving the
131children executables playing around unguided.
132
133You don't want to leave children processes outliving the task on the Swarming
134bot, do you? The Swarming bot doesn't.
135
136How to fix?
137- For any process that starts children processes, make sure all children
138 processes terminated properly before each parent process exits. This is
139 especially important in very deep process trees.
140 - This must be done properly both in normal successful task and in case of
141 task failure. Cleanup is very important.
142- The Swarming bot sends a SIGTERM in case of timeout.
143 - You have %s seconds to comply after the signal was sent to the process
144 before the process is forcibly killed.
145- To achieve not leaking children processes in case of signals on timeout, you
146 MUST handle signals in each executable / python script and propagate them to
147 children processes.
148 - When your test script (python or binary) receives a signal like SIGTERM or
149 CTRL_BREAK_EVENT on Windows), send it to all children processes and wait for
150 them to terminate before quitting.
151
152See
Marc-Antoine Ruelc7243592018-05-24 17:04:04 -0400153https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Bot.md#Graceful-termination_aka-the-SIGTERM-and-SIGKILL-dance
marueld928c862017-06-08 08:20:04 -0700154for more information.
155
156*** May the SIGKILL force be with you ***
157"""
158
159
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000160# Currently hardcoded. Eventually could be exposed as a flag once there's value.
161# 3 weeks
162MAX_AGE_SECS = 21*24*60*60
163
Ye Kuang72e6fe82020-08-05 06:30:04 +0000164# TODO(1099655): Enable this once all prod issues are gone.
165_USE_GO_ISOLATED_TO_UPLOAD = False
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000166
Takuto Ikuta7ff4b242020-12-03 08:07:06 +0000167_CAS_KVS_CACHE_THRESHOLD = 5 * 1024 * 1024 * 1024 # 5 GiB
168
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500169TaskData = collections.namedtuple(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000170 'TaskData',
171 [
Takuto Ikuta9a319502019-11-26 07:40:14 +0000172 # List of strings; the command line to use, independent of what was
173 # specified in the isolated file.
174 'command',
175 # Relative directory to start command into.
176 'relative_cwd',
Takuto Ikuta9a319502019-11-26 07:40:14 +0000177 # Hash of the .isolated file that must be retrieved to recreate the tree
178 # of files to run the target executable. The command specified in the
179 # .isolated is executed. Mutually exclusive with command argument.
180 'isolated_hash',
181 # isolateserver.Storage instance to retrieve remote objects. This object
182 # has a reference to an isolateserver.StorageApi, which does the actual
183 # I/O.
184 'storage',
185 # isolateserver.LocalCache instance to keep from retrieving the same
186 # objects constantly by caching the objects retrieved. Can be on-disk or
187 # in-memory.
188 'isolate_cache',
Junji Watanabe54925c32020-09-08 00:56:18 +0000189 # Digest of the input root on RBE-CAS.
190 'cas_digest',
191 # Full CAS instance name.
192 'cas_instance',
Takuto Ikuta9a319502019-11-26 07:40:14 +0000193 # List of paths relative to root_dir to put into the output isolated
194 # bundle upon task completion (see link_outputs_to_outdir).
195 'outputs',
196 # Function (run_dir) => context manager that installs named caches into
197 # |run_dir|.
198 'install_named_caches',
199 # If True, the temporary directory will be deliberately leaked for later
200 # examination.
201 'leak_temp_dir',
202 # Path to the directory to use to create the temporary directory. If not
203 # specified, a random temporary directory is created.
204 'root_dir',
205 # Kills the process if it lasts more than this amount of seconds.
206 'hard_timeout',
207 # Number of seconds to wait between SIGTERM and SIGKILL.
208 'grace_period',
209 # Path to a file with bot state, used in place of ${SWARMING_BOT_FILE}
210 # task command line argument.
211 'bot_file',
212 # Logical account to switch LUCI_CONTEXT into.
213 'switch_to_account',
214 # Context manager dir => CipdInfo, see install_client_and_packages.
215 'install_packages_fn',
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000216 # Use go isolated client.
217 'use_go_isolated',
Junji Watanabeb03450b2020-09-25 05:09:27 +0000218 # Cache directory for go `isolated` client.
Takuto Ikuta057c5342019-12-03 04:05:05 +0000219 'go_cache_dir',
Junji Watanabeb03450b2020-09-25 05:09:27 +0000220 # Parameters passed to go `isolated` client.
Takuto Ikuta879788c2020-01-10 08:00:26 +0000221 'go_cache_policies',
Junji Watanabeb03450b2020-09-25 05:09:27 +0000222 # Cache directory for `cas` client.
223 'cas_cache_dir',
224 # Parameters passed to `cas` client.
225 'cas_cache_policies',
Takuto Ikutaae391c52020-12-03 08:43:45 +0000226 # Parameters for kvs file used by `cas` client.
227 'cas_kvs',
Takuto Ikuta9a319502019-11-26 07:40:14 +0000228 # Environment variables to set.
229 'env',
230 # Environment variables to mutate with relative directories.
231 # Example: {"ENV_KEY": ['relative', 'paths', 'to', 'prepend']}
232 'env_prefix',
233 # Lowers the task process priority.
234 'lower_priority',
235 # subprocess42.Containment instance. Can be None.
236 'containment',
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000237 ])
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500238
239
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -0500240def _to_str(s):
241 """Downgrades a unicode instance to str. Pass str through as-is."""
242 if isinstance(s, str):
243 return s
244 # This is technically incorrect, especially on Windows. In theory
245 # sys.getfilesystemencoding() should be used to use the right 'ANSI code
246 # page' on Windows, but that causes other problems, as the character set
247 # is very limited.
248 return s.encode('utf-8')
249
250
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500251def _to_unicode(s):
252 """Upgrades a str instance to unicode. Pass unicode through as-is."""
Takuto Ikuta95459dd2019-10-29 12:39:47 +0000253 if isinstance(s, six.text_type) or s is None:
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -0500254 return s
255 return s.decode('utf-8')
256
257
maruel03e11842016-07-14 10:50:16 -0700258def make_temp_dir(prefix, root_dir):
259 """Returns a new unique temporary directory."""
Takuto Ikuta6e2ff962019-10-29 12:35:27 +0000260 return six.text_type(tempfile.mkdtemp(prefix=prefix, dir=root_dir))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000261
262
vadimsh9c54b2c2017-07-25 14:08:29 -0700263@contextlib.contextmanager
264def set_luci_context_account(account, tmp_dir):
265 """Sets LUCI_CONTEXT account to be used by the task.
266
267 If 'account' is None or '', does nothing at all. This happens when
268 run_isolated.py is called without '--switch-to-account' flag. In this case,
269 if run_isolated.py is running in some LUCI_CONTEXT environment, the task will
Takuto Ikuta33e2ff32019-09-30 12:44:03 +0000270 just inherit whatever account is already set. This may happen if users invoke
vadimsh9c54b2c2017-07-25 14:08:29 -0700271 run_isolated.py explicitly from their code.
272
273 If the requested account is not defined in the context, switches to
274 non-authenticated access. This happens for Swarming tasks that don't use
275 'task' service accounts.
276
277 If not using LUCI_CONTEXT-based auth, does nothing.
278 If already running as requested account, does nothing.
279 """
280 if not account:
281 # Not actually switching.
282 yield
283 return
284
285 local_auth = luci_context.read('local_auth')
286 if not local_auth:
287 # Not using LUCI_CONTEXT auth at all.
288 yield
289 return
290
291 # See LUCI_CONTEXT.md for the format of 'local_auth'.
292 if local_auth.get('default_account_id') == account:
293 # Already set, no need to switch.
294 yield
295 return
296
297 available = {a['id'] for a in local_auth.get('accounts') or []}
298 if account in available:
299 logging.info('Switching default LUCI_CONTEXT account to %r', account)
300 local_auth['default_account_id'] = account
301 else:
302 logging.warning(
303 'Requested LUCI_CONTEXT account %r is not available (have only %r), '
304 'disabling authentication', account, sorted(available))
305 local_auth.pop('default_account_id', None)
306
307 with luci_context.write(_tmpdir=tmp_dir, local_auth=local_auth):
308 yield
309
310
nodir90bc8dc2016-06-15 13:35:21 -0700311def process_command(command, out_dir, bot_file):
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000312 """Replaces parameters in a command line.
nodirbe642ff2016-06-09 15:51:51 -0700313
314 Raises:
315 ValueError if a parameter is requested in |command| but its value is not
316 provided.
317 """
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000318 return [replace_parameters(arg, out_dir, bot_file) for arg in command]
319
320
321def replace_parameters(arg, out_dir, bot_file):
322 """Replaces parameter tokens with appropriate values in a string.
323
324 Raises:
325 ValueError if a parameter is requested in |arg| but its value is not
326 provided.
327 """
328 arg = arg.replace(EXECUTABLE_SUFFIX_PARAMETER, cipd.EXECUTABLE_SUFFIX)
329 replace_slash = False
330 if ISOLATED_OUTDIR_PARAMETER in arg:
331 if not out_dir:
332 raise ValueError(
333 'output directory is requested in command or env var, but not '
334 'provided; please specify one')
335 arg = arg.replace(ISOLATED_OUTDIR_PARAMETER, out_dir)
336 replace_slash = True
337 if SWARMING_BOT_FILE_PARAMETER in arg:
338 if bot_file:
339 arg = arg.replace(SWARMING_BOT_FILE_PARAMETER, bot_file)
nodirbe642ff2016-06-09 15:51:51 -0700340 replace_slash = True
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000341 else:
342 logging.warning('SWARMING_BOT_FILE_PARAMETER found in command or env '
343 'var, but no bot_file specified. Leaving parameter '
344 'unchanged.')
345 if replace_slash:
346 # Replace slashes only if parameters are present
347 # because of arguments like '${ISOLATED_OUTDIR}/foo/bar'
348 arg = arg.replace('/', os.sep)
349 return arg
maruela9cfd6f2015-09-15 11:03:15 -0700350
351
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000352def set_temp_dir(env, tmp_dir):
353 """Set temp dir to given env var dictionary"""
354 tmp_dir = _to_str(tmp_dir)
355 # pylint: disable=line-too-long
356 # * python respects $TMPDIR, $TEMP, and $TMP in this order, regardless of
357 # platform. So $TMPDIR must be set on all platforms.
358 # https://github.com/python/cpython/blob/2.7/Lib/tempfile.py#L155
359 env['TMPDIR'] = tmp_dir
360 if sys.platform == 'win32':
361 # * chromium's base utils uses GetTempPath().
362 # https://cs.chromium.org/chromium/src/base/files/file_util_win.cc?q=GetTempPath
363 # * Go uses GetTempPath().
364 # * GetTempDir() uses %TMP%, then %TEMP%, then other stuff. So %TMP% must be
365 # set.
366 # https://docs.microsoft.com/en-us/windows/desktop/api/fileapi/nf-fileapi-gettemppathw
367 env['TMP'] = tmp_dir
368 # https://blogs.msdn.microsoft.com/oldnewthing/20150417-00/?p=44213
369 env['TEMP'] = tmp_dir
370 elif sys.platform == 'darwin':
371 # * Chromium uses an hack on macOS before calling into
372 # NSTemporaryDirectory().
373 # https://cs.chromium.org/chromium/src/base/files/file_util_mac.mm?q=GetTempDir
374 # https://developer.apple.com/documentation/foundation/1409211-nstemporarydirectory
375 env['MAC_CHROMIUM_TMPDIR'] = tmp_dir
376 else:
377 # TMPDIR is specified as the POSIX standard envvar for the temp directory.
378 # http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
379 # * mktemp on linux respects $TMPDIR.
380 # * Chromium respects $TMPDIR on linux.
381 # https://cs.chromium.org/chromium/src/base/files/file_util_posix.cc?q=GetTempDir
382 # * Go uses $TMPDIR.
383 # https://go.googlesource.com/go/+/go1.10.3/src/os/file_unix.go#307
384 pass
385
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000386
387def get_command_env(tmp_dir, cipd_info, run_dir, env, env_prefixes, out_dir,
388 bot_file):
vadimsh232f5a82017-01-20 19:23:44 -0800389 """Returns full OS environment to run a command in.
390
Robert Iannuccibf5f84c2017-11-22 12:56:50 -0800391 Sets up TEMP, puts directory with cipd binary in front of PATH, exposes
392 CIPD_CACHE_DIR env var, and installs all env_prefixes.
vadimsh232f5a82017-01-20 19:23:44 -0800393
394 Args:
395 tmp_dir: temp directory.
396 cipd_info: CipdInfo object is cipd client is used, None if not.
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500397 run_dir: The root directory the isolated tree is mapped in.
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500398 env: environment variables to use
Robert Iannuccibf5f84c2017-11-22 12:56:50 -0800399 env_prefixes: {"ENV_KEY": ['cwd', 'relative', 'paths', 'to', 'prepend']}
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000400 out_dir: Isolated output directory. Required to be != None if any of the
401 env vars contain ISOLATED_OUTDIR_PARAMETER.
402 bot_file: Required to be != None if any of the env vars contain
403 SWARMING_BOT_FILE_PARAMETER.
vadimsh232f5a82017-01-20 19:23:44 -0800404 """
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500405 out = os.environ.copy()
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000406 for k, v in env.items():
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500407 if not v:
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500408 out.pop(k, None)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500409 else:
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000410 out[k] = replace_parameters(v, out_dir, bot_file)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500411
412 if cipd_info:
413 bin_dir = os.path.dirname(cipd_info.client.binary_path)
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -0500414 out['PATH'] = '%s%s%s' % (_to_str(bin_dir), os.pathsep, out['PATH'])
415 out['CIPD_CACHE_DIR'] = _to_str(cipd_info.cache_dir)
Takuto Ikuta4ec3e8f2021-04-05 10:21:29 +0000416 cipd_info_path = os.path.join(tmp_dir, 'cipd_info.json')
417 with open(cipd_info_path, 'w') as f:
418 json.dump(cipd_info.pins, f)
419 out['ISOLATED_RESOLVED_PACKAGE_VERSIONS_FILE'] = cipd_info_path
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500420
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000421 for key, paths in env_prefixes.items():
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500422 assert isinstance(paths, list), paths
423 paths = [os.path.normpath(os.path.join(run_dir, p)) for p in paths]
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500424 cur = out.get(key)
425 if cur:
426 paths.append(cur)
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -0500427 out[key] = _to_str(os.path.pathsep.join(paths))
vadimsh232f5a82017-01-20 19:23:44 -0800428
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000429 set_temp_dir(out, tmp_dir)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500430 return out
vadimsh232f5a82017-01-20 19:23:44 -0800431
432
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000433def run_command(
434 command, cwd, env, hard_timeout, grace_period, lower_priority, containment):
maruel6be7f9e2015-10-01 12:25:30 -0700435 """Runs the command.
436
437 Returns:
438 tuple(process exit code, bool if had a hard timeout)
439 """
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000440 logging.info(
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000441 'run_command(%s, %s, %s, %s, %s, %s)',
442 command, cwd, hard_timeout, grace_period, lower_priority, containment)
marueleb5fbee2015-09-17 13:01:36 -0700443
maruel6be7f9e2015-10-01 12:25:30 -0700444 exit_code = None
445 had_hard_timeout = False
maruela9cfd6f2015-09-15 11:03:15 -0700446 with tools.Profiler('RunTest'):
maruel6be7f9e2015-10-01 12:25:30 -0700447 proc = None
448 had_signal = []
maruela9cfd6f2015-09-15 11:03:15 -0700449 try:
maruel6be7f9e2015-10-01 12:25:30 -0700450 # TODO(maruel): This code is imperfect. It doesn't handle well signals
451 # during the download phase and there's short windows were things can go
452 # wrong.
453 def handler(signum, _frame):
454 if proc and not had_signal:
455 logging.info('Received signal %d', signum)
456 had_signal.append(True)
maruel556d9052015-10-05 11:12:44 -0700457 raise subprocess42.TimeoutExpired(command, None)
maruel6be7f9e2015-10-01 12:25:30 -0700458
Marc-Antoine Ruel30b80fe2019-02-08 13:51:31 +0000459 proc = subprocess42.Popen(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000460 command, cwd=cwd, env=env, detached=True, close_fds=True,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000461 lower_priority=lower_priority, containment=containment)
maruel6be7f9e2015-10-01 12:25:30 -0700462 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
463 try:
John Budorickc398f092019-06-10 22:49:44 +0000464 exit_code = proc.wait(hard_timeout or None)
maruel6be7f9e2015-10-01 12:25:30 -0700465 except subprocess42.TimeoutExpired:
466 if not had_signal:
467 logging.warning('Hard timeout')
468 had_hard_timeout = True
469 logging.warning('Sending SIGTERM')
470 proc.terminate()
471
Takuto Ikuta684f7912020-09-29 07:49:49 +0000472 kill_sent = False
maruel6be7f9e2015-10-01 12:25:30 -0700473 # Ignore signals in grace period. Forcibly give the grace period to the
474 # child process.
475 if exit_code is None:
476 ignore = lambda *_: None
477 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
478 try:
479 exit_code = proc.wait(grace_period or None)
480 except subprocess42.TimeoutExpired:
481 # Now kill for real. The user can distinguish between the
482 # following states:
483 # - signal but process exited within grace period,
484 # hard_timed_out will be set but the process exit code will be
485 # script provided.
486 # - processed exited late, exit code will be -9 on posix.
487 logging.warning('Grace exhausted; sending SIGKILL')
488 proc.kill()
Takuto Ikuta684f7912020-09-29 07:49:49 +0000489 kill_sent = True
martiniss5c8043e2017-08-01 17:09:43 -0700490 logging.info('Waiting for process exit')
maruel6be7f9e2015-10-01 12:25:30 -0700491 exit_code = proc.wait()
Takuto Ikuta684f7912020-09-29 07:49:49 +0000492
493 # the process group / job object may be dangling so if we didn't kill
494 # it already, give it a poke now.
495 if not kill_sent:
496 proc.kill()
Takuto Ikutaeccf0862020-03-19 03:05:55 +0000497 except OSError as e:
maruela9cfd6f2015-09-15 11:03:15 -0700498 # This is not considered to be an internal error. The executable simply
499 # does not exit.
maruela72f46e2016-02-24 11:05:45 -0800500 sys.stderr.write(
tikuta2d678212019-09-23 23:12:08 +0000501 '<The executable does not exist, a dependent library is missing or '
502 'the command line is too long>\n'
503 '<Check for missing .so/.dll in the .isolate or GN file or length of '
504 'command line args>\n'
Takuto Ikutae900df42021-04-14 04:40:11 +0000505 '<Command: %s>\n'
506 '<Exception: %s>\n' % (command, e))
maruela72f46e2016-02-24 11:05:45 -0800507 if os.environ.get('SWARMING_TASK_ID'):
508 # Give an additional hint when running as a swarming task.
509 sys.stderr.write(
510 '<See the task\'s page for commands to help diagnose this issue '
511 'by reproducing the task locally>\n')
maruela9cfd6f2015-09-15 11:03:15 -0700512 exit_code = 1
513 logging.info(
514 'Command finished with exit code %d (%s)',
515 exit_code, hex(0xffffffff & exit_code))
maruel6be7f9e2015-10-01 12:25:30 -0700516 return exit_code, had_hard_timeout
maruela9cfd6f2015-09-15 11:03:15 -0700517
518
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000519def _run_go_cmd_and_wait(cmd, tmp_dir):
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000520 """
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000521 Runs an external Go command, `isolated` or `cas`, and wait for its completion.
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000522
523 While this is a generic function to launch a subprocess, it has logic that
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000524 is specific to Go `isolated` and `cas` for waiting and logging.
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000525
526 Returns:
527 The subprocess object
528 """
Ye Kuang3c40e9f2020-07-28 13:15:25 +0000529 cmd_str = ' '.join(cmd)
Ye Kuangc1d800f2020-07-28 10:14:55 +0000530 try:
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000531 env = os.environ.copy()
532 set_temp_dir(env, tmp_dir)
533 proc = subprocess42.Popen(cmd, env=env)
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000534
Ye Kuangc1d800f2020-07-28 10:14:55 +0000535 exceeded_max_timeout = True
536 check_period_sec = 30
537 max_checks = 100
538 # max timeout = max_checks * check_period_sec = 50 minutes
539 for i in range(max_checks):
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000540 # This is to prevent I/O timeout error during setup.
Ye Kuangc1d800f2020-07-28 10:14:55 +0000541 try:
542 retcode = proc.wait(check_period_sec)
543 if retcode != 0:
Takuto Ikuta27f4b2f2021-04-26 07:18:55 +0000544 raise subprocess42.CalledProcessError(retcode, cmd=cmd_str)
Ye Kuangc1d800f2020-07-28 10:14:55 +0000545 exceeded_max_timeout = False
546 break
547 except subprocess42.TimeoutExpired:
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000548 print('still running (after %d seconds)' % ((i + 1) * check_period_sec))
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000549
Ye Kuangc1d800f2020-07-28 10:14:55 +0000550 if exceeded_max_timeout:
551 proc.terminate()
552 try:
553 proc.wait(check_period_sec)
554 except subprocess42.TimeoutExpired:
555 logging.exception(
556 "failed to terminate? timeout happened after %d seconds",
557 check_period_sec)
558 proc.kill()
559 proc.wait()
560 # Raise unconditionally, because |proc| was forcefully terminated.
561 raise ValueError("timedout after %d seconds (cmd=%s)" %
562 (check_period_sec * max_checks, cmd_str))
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000563
Ye Kuangc1d800f2020-07-28 10:14:55 +0000564 return proc
565 except Exception:
566 logging.exception('Failed to run Go cmd %s', cmd_str)
567 raise
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000568
569
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000570def _fetch_and_map_with_cas(cas_client, digest, instance, output_dir, cache_dir,
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +0000571 policies, kvs_dir, tmp_dir):
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000572 """
573 Fetches a CAS tree using cas client, create the tree and returns download
574 stats.
575 """
576
577 start = time.time()
578 result_json_handle, result_json_path = tempfile.mkstemp(
579 prefix=u'fetch-and-map-result-', suffix=u'.json')
580 os.close(result_json_handle)
Takuto Ikutad5749ac2021-04-07 06:16:19 +0000581 profile_dir = tempfile.mkdtemp(dir=tmp_dir)
582
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000583 try:
584 cmd = [
585 cas_client,
586 'download',
587 '-digest',
588 digest,
589 '-cas-instance',
590 instance,
591 # flags for cache.
592 '-cache-dir',
593 cache_dir,
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000594 '-cache-max-size',
595 str(policies.max_cache_size),
596 '-cache-min-free-space',
597 str(policies.min_free_space),
598 # flags for output.
599 '-dir',
600 output_dir,
601 '-dump-stats-json',
602 result_json_path,
Takuto Ikuta557025b2021-02-01 08:37:40 +0000603 '-log-level',
Takuto Ikutad5749ac2021-04-07 06:16:19 +0000604 'info',
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000605 ]
Takuto Ikutaae391c52020-12-03 08:43:45 +0000606
Takuto Ikuta8757d0a2021-04-12 09:04:53 +0000607 # cpu profile may not work fast on armv7l.
608 # https://crbug.com/1197523#c10
609 do_profile = platform.machine() != 'armv7l'
610
611 if do_profile:
612 cmd.extend([
613 '-profile-output-dir',
614 profile_dir,
615 '-profile-cpu',
616 '-profile-trace',
617 ])
618
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +0000619 if kvs_dir:
620 cmd.extend(['-kvs-dir', kvs_dir])
Takuto Ikutaae391c52020-12-03 08:43:45 +0000621
Takuto Ikuta27f4b2f2021-04-26 07:18:55 +0000622 try:
623 _run_go_cmd_and_wait(cmd, tmp_dir)
624 except subprocess42.CalledProcessError:
625 if not kvs_dir:
626 raise
627 logging.exception('Failed to run cas, removing kvs cache dir and retry.')
628
629 file_path.rmtree(kvs_dir)
630 file_path.rmtree(output_dir)
631 _run_go_cmd_and_wait(cmd, tmp_dir)
632
Takuto Ikuta8757d0a2021-04-12 09:04:53 +0000633 if time.time() - start >= 30 and do_profile:
Takuto Ikutad5749ac2021-04-07 06:16:19 +0000634 # If downloading takes long time, upload profile for later performance
635 # analysis.
636 subprocess42.check_call([
637 cas_client, 'archive', '-cas-instance', instance, '-paths',
638 profile_dir + ':.'
639 ])
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000640
641 with open(result_json_path) as json_file:
642 result_json = json.load(json_file)
643
644 return {
645 'duration': time.time() - start,
646 'items_cold': result_json['items_cold'],
647 'items_hot': result_json['items_hot'],
648 }
649 finally:
650 fs.remove(result_json_path)
Takuto Ikutad5749ac2021-04-07 06:16:19 +0000651 file_path.rmtree(profile_dir)
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000652
653
654def _fetch_and_map_with_go_isolated(isolated_hash, storage, outdir,
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000655 go_cache_dir, policies, isolated_client,
656 tmp_dir):
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000657 """
658 Fetches an isolated tree using go client, create the tree and returns
Takuto Ikuta57219f42020-11-02 07:35:36 +0000659 stats.
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000660 """
661 start = time.time()
662 server_ref = storage.server_ref
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000663 result_json_handle, result_json_path = tempfile.mkstemp(
664 prefix=u'fetch-and-map-result-', suffix=u'.json')
665 os.close(result_json_handle)
666 try:
Ye Kuanga98764c2020-04-09 03:17:37 +0000667 cmd = [
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000668 isolated_client,
669 'download',
670 '-isolate-server',
671 server_ref.url,
672 '-namespace',
673 server_ref.namespace,
674 '-isolated',
675 isolated_hash,
676
677 # flags for cache
678 '-cache-dir',
Takuto Ikuta057c5342019-12-03 04:05:05 +0000679 go_cache_dir,
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000680 '-cache-max-items',
Takuto Ikuta50bc0552019-12-03 03:26:46 +0000681 str(policies.max_items),
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000682 '-cache-max-size',
Takuto Ikuta50bc0552019-12-03 03:26:46 +0000683 str(policies.max_cache_size),
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000684 '-cache-min-free-space',
Takuto Ikuta50bc0552019-12-03 03:26:46 +0000685 str(policies.min_free_space),
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000686
687 # flags for output
688 '-output-dir',
689 outdir,
690 '-fetch-and-map-result-json',
691 result_json_path,
Ye Kuanga98764c2020-04-09 03:17:37 +0000692 ]
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000693 _run_go_cmd_and_wait(cmd, tmp_dir)
Takuto Ikuta3153e3b2020-02-18 06:11:47 +0000694
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000695 with open(result_json_path) as json_file:
696 result_json = json.load(json_file)
697
Takuto Ikuta57219f42020-11-02 07:35:36 +0000698 return {
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000699 'duration': time.time() - start,
700 'items_cold': result_json['items_cold'],
701 'items_hot': result_json['items_hot'],
Ye Kuang65a1de52020-10-16 08:31:16 +0000702 'initial_number_items': result_json['initial_number_items'],
703 'initial_size': result_json['initial_size'],
Takuto Ikutad03ffcc2019-12-02 01:04:23 +0000704 }
705 finally:
706 fs.remove(result_json_path)
707
708
709# TODO(crbug.com/932396): remove this function.
Takuto Ikuta16fac4b2019-12-09 04:57:18 +0000710def fetch_and_map(isolated_hash, storage, cache, outdir):
Takuto Ikuta57219f42020-11-02 07:35:36 +0000711 """Fetches an isolated tree, create the tree and returns stats."""
nodir6f801882016-04-29 14:41:50 -0700712 start = time.time()
Takuto Ikuta57219f42020-11-02 07:35:36 +0000713 isolateserver.fetch_isolated(
nodir6f801882016-04-29 14:41:50 -0700714 isolated_hash=isolated_hash,
715 storage=storage,
716 cache=cache,
maruel4409e302016-07-19 14:25:51 -0700717 outdir=outdir,
Takuto Ikuta16fac4b2019-12-09 04:57:18 +0000718 use_symlinks=False)
Takuto Ikuta2b9640e2019-06-19 00:53:23 +0000719 hot = (collections.Counter(cache.used) -
720 collections.Counter(cache.added)).elements()
Takuto Ikuta57219f42020-11-02 07:35:36 +0000721 return {
Takuto Ikuta630f99d2020-07-02 12:59:35 +0000722 'duration': time.time() - start,
723 'items_cold': base64.b64encode(large.pack(sorted(cache.added))).decode(),
724 'items_hot': base64.b64encode(large.pack(sorted(hot))).decode(),
nodir6f801882016-04-29 14:41:50 -0700725 }
726
727
aludwin0a8e17d2016-10-27 15:57:39 -0700728def link_outputs_to_outdir(run_dir, out_dir, outputs):
729 """Links any named outputs to out_dir so they can be uploaded.
730
731 Raises an error if the file already exists in that directory.
732 """
733 if not outputs:
734 return
735 isolateserver.create_directories(out_dir, outputs)
736 for o in outputs:
Sadaf Matinkhoo10743a62018-03-29 16:28:58 -0400737 copy_recursively(os.path.join(run_dir, o), os.path.join(out_dir, o))
738
739
740def copy_recursively(src, dst):
741 """Efficiently copies a file or directory from src_dir to dst_dir.
742
743 `item` may be a file, directory, or a symlink to a file or directory.
744 All symlinks are replaced with their targets, so the resulting
745 directory structure in dst_dir will never have any symlinks.
746
747 To increase speed, copy_recursively hardlinks individual files into the
748 (newly created) directory structure if possible, unlike Python's
749 shutil.copytree().
750 """
751 orig_src = src
752 try:
753 # Replace symlinks with their final target.
754 while fs.islink(src):
755 res = fs.readlink(src)
756 src = os.path.join(os.path.dirname(src), res)
757 # TODO(sadafm): Explicitly handle cyclic symlinks.
758
759 # Note that fs.isfile (which is a wrapper around os.path.isfile) throws
760 # an exception if src does not exist. A warning will be logged in that case.
761 if fs.isfile(src):
762 file_path.link_file(dst, src, file_path.HARDLINK_WITH_FALLBACK)
763 return
764
765 if not fs.exists(dst):
766 os.makedirs(dst)
767
768 for child in fs.listdir(src):
769 copy_recursively(os.path.join(src, child), os.path.join(dst, child))
770
771 except OSError as e:
772 if e.errno == errno.ENOENT:
773 logging.warning('Path %s does not exist or %s is a broken symlink',
774 src, orig_src)
775 else:
776 logging.info("Couldn't collect output file %s: %s", src, e)
aludwin0a8e17d2016-10-27 15:57:39 -0700777
778
Ye Kuangfb0bad62020-07-28 08:07:25 +0000779def _upload_with_py(storage, out_dir):
780
781 def process_stats(f_st):
782 st = sorted(i.size for i in f_st)
783 return base64.b64encode(large.pack(st)).decode()
784
785 try:
786 results, f_cold, f_hot = isolateserver.archive_files_to_storage(
787 storage, [out_dir], None, verify_push=True)
788
789 isolated = list(results.values())[0]
790 cold = process_stats(f_cold)
791 hot = process_stats(f_hot)
792 return isolated, cold, hot
793
794 except isolateserver.Aborted:
795 # This happens when a signal SIGTERM was received while uploading data.
796 # There is 2 causes:
797 # - The task was too slow and was about to be killed anyway due to
798 # exceeding the hard timeout.
799 # - The amount of data uploaded back is very large and took too much
800 # time to archive.
801 sys.stderr.write('Received SIGTERM while uploading')
802 # Re-raise, so it will be treated as an internal failure.
803 raise
804
805
806def _upload_with_go(storage, outdir, isolated_client):
807 """
808 Uploads results back using the Go `isolated` CLI.
809 """
810 server_ref = storage.server_ref
811 isolated_handle, isolated_path = tempfile.mkstemp(
812 prefix=u'isolated-hash-', suffix=u'.txt')
813 stats_json_handle, stats_json_path = tempfile.mkstemp(
814 prefix=u'dump-stats-', suffix=u'.json')
815 os.close(isolated_handle)
816 os.close(stats_json_handle)
817 try:
818 cmd = [
819 isolated_client,
820 'archive',
821 '-isolate-server',
822 server_ref.url,
823 '-namespace',
824 server_ref.namespace,
825 '-dirs',
826 # Format: <working directory>:<relative path to dir>
827 outdir + ':',
828
829 # output
830 '-dump-hash',
831 isolated_path,
832 '-dump-stats-json',
833 stats_json_path,
Ye Kuangbc4e8402020-07-29 09:54:30 +0000834 '-quiet',
Ye Kuangfb0bad62020-07-28 08:07:25 +0000835 ]
Ye Kuang0023dc52020-08-04 05:28:41 +0000836 # Will do exponential backoff, e.g. 10, 20, 40...
837 # This mitigates https://crbug.com/1094369, where there is a data race on
838 # the uploaded files.
839 backoff = 10
Takuto Ikutae0bfec72020-08-28 02:52:52 +0000840 started = time.time()
Ye Kuang0023dc52020-08-04 05:28:41 +0000841 while True:
842 try:
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000843 _run_go_cmd_and_wait(cmd, tmp_dir)
Ye Kuang0023dc52020-08-04 05:28:41 +0000844 break
845 except Exception:
Takuto Ikutae0bfec72020-08-28 02:52:52 +0000846 if time.time() > started + 60 * 2:
847 # This is to not wait task having leaked process long time.
Ye Kuang0023dc52020-08-04 05:28:41 +0000848 raise
849
850 on_error.report('error before %d second backoff' % backoff)
851 logging.exception(
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000852 '_run_go_cmd_and_wait() failed, will retry after %d seconds',
Ye Kuang0023dc52020-08-04 05:28:41 +0000853 backoff)
854 time.sleep(backoff)
855 backoff *= 2
Ye Kuangfb0bad62020-07-28 08:07:25 +0000856
857 with open(isolated_path) as isol_file:
858 isolated = isol_file.read()
859 with open(stats_json_path) as json_file:
860 stats_json = json.load(json_file)
861
862 return isolated, stats_json['items_cold'], stats_json['items_hot']
863 finally:
864 fs.remove(isolated_path)
865 fs.remove(stats_json_path)
866
867
Ye Kuangbc4e8402020-07-29 09:54:30 +0000868def upload_out_dir(storage, out_dir, go_isolated_client):
869 """Uploads the results in |out_dir| back, if there is any.
maruela9cfd6f2015-09-15 11:03:15 -0700870
871 Returns:
Ye Kuangbc4e8402020-07-29 09:54:30 +0000872 tuple(outputs_ref, stats)
maruel064c0a32016-04-05 11:47:15 -0700873 - outputs_ref: a dict referring to the results archived back to the isolated
874 server, if applicable.
nodir6f801882016-04-29 14:41:50 -0700875 - stats: uploading stats.
maruela9cfd6f2015-09-15 11:03:15 -0700876 """
maruela9cfd6f2015-09-15 11:03:15 -0700877 # Upload out_dir and generate a .isolated file out of this directory. It is
878 # only done if files were written in the directory.
879 outputs_ref = None
Ye Kuangfb0bad62020-07-28 08:07:25 +0000880 cold = ''
881 hot = ''
nodir6f801882016-04-29 14:41:50 -0700882 start = time.time()
883
maruel12e30012015-10-09 11:55:35 -0700884 if fs.isdir(out_dir) and fs.listdir(out_dir):
maruela9cfd6f2015-09-15 11:03:15 -0700885 with tools.Profiler('ArchiveOutput'):
Ye Kuangfb0bad62020-07-28 08:07:25 +0000886 isolated = None
Ye Kuang72e6fe82020-08-05 06:30:04 +0000887 if _USE_GO_ISOLATED_TO_UPLOAD and go_isolated_client is not None:
Ye Kuangfb0bad62020-07-28 08:07:25 +0000888 isolated, cold, hot = _upload_with_go(storage, out_dir,
889 go_isolated_client)
Ye Kuang72e6fe82020-08-05 06:30:04 +0000890 else:
891 isolated, cold, hot = _upload_with_py(storage, out_dir)
Ye Kuangfb0bad62020-07-28 08:07:25 +0000892 outputs_ref = {
893 'isolated': isolated,
894 'isolatedserver': storage.server_ref.url,
895 'namespace': storage.server_ref.namespace,
896 }
nodir6f801882016-04-29 14:41:50 -0700897
nodir6f801882016-04-29 14:41:50 -0700898 stats = {
Takuto Ikuta630f99d2020-07-02 12:59:35 +0000899 'duration': time.time() - start,
Ye Kuangfb0bad62020-07-28 08:07:25 +0000900 'items_cold': cold,
901 'items_hot': hot,
nodir6f801882016-04-29 14:41:50 -0700902 }
Ye Kuangbc4e8402020-07-29 09:54:30 +0000903 return outputs_ref, stats
maruela9cfd6f2015-09-15 11:03:15 -0700904
905
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000906def upload_outdir_with_cas(cas_client, cas_instance, outdir, tmp_dir):
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000907 """Uploads the results in |outdir|, if there is any.
908
909 Returns:
910 tuple(root_digest, stats)
911 - root_digest: a digest of the output directory.
912 - stats: uploading stats.
913 """
914 digest_file_handle, digest_path = tempfile.mkstemp(
915 prefix=u'cas-digest', suffix=u'.txt')
916 os.close(digest_file_handle)
917 stats_json_handle, stats_json_path = tempfile.mkstemp(
918 prefix=u'upload-stats', suffix=u'.json')
919 os.close(stats_json_handle)
920
921 try:
922 cmd = [
923 cas_client,
924 'archive',
925 '-cas-instance',
926 cas_instance,
927 '-paths',
928 # Format: <working directory>:<relative path to dir>
929 outdir + ':',
930 # output
931 '-dump-digest',
932 digest_path,
933 '-dump-stats-json',
934 stats_json_path,
935 ]
936
937 start = time.time()
938
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000939 _run_go_cmd_and_wait(cmd, tmp_dir)
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000940
941 with open(digest_path) as digest_file:
942 digest = digest_file.read()
Junji Watanabec208b302020-09-25 09:18:27 +0000943 h, s = digest.split('/')
944 cas_output_root = {
945 'cas_instance': cas_instance,
946 'digest': {
947 'hash': h,
948 'size_bytes': int(s)
949 }
950 }
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000951 with open(stats_json_path) as stats_file:
952 stats = json.load(stats_file)
953
954 stats['duration'] = time.time() - start
955
Junji Watanabec208b302020-09-25 09:18:27 +0000956 return cas_output_root, stats
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000957 finally:
958 fs.remove(digest_path)
959 fs.remove(stats_json_path)
960
961
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500962def map_and_run(data, constant_run_path):
nodir55be77b2016-05-03 09:39:57 -0700963 """Runs a command with optional isolated input/output.
964
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500965 Arguments:
966 - data: TaskData instance.
967 - constant_run_path: TODO
nodir55be77b2016-05-03 09:39:57 -0700968
969 Returns metadata about the result.
970 """
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000971
972 if data.isolate_cache:
973 download_stats = {
974 #'duration': 0.,
975 'initial_number_items': len(data.isolate_cache),
976 'initial_size': data.isolate_cache.total_size,
977 #'items_cold': '<large.pack()>',
978 #'items_hot': '<large.pack()>',
979 }
980 else:
981 # TODO(tikuta): take stats from state.json in this case too.
982 download_stats = {}
983
maruela9cfd6f2015-09-15 11:03:15 -0700984 result = {
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000985 'duration': None,
986 'exit_code': None,
987 'had_hard_timeout': False,
988 'internal_failure': 'run_isolated did not complete properly',
989 'stats': {
990 #'cipd': {
991 # 'duration': 0.,
992 # 'get_client_duration': 0.,
993 #},
994 'isolated': {
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000995 'download': download_stats,
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000996 #'upload': {
997 # 'duration': 0.,
998 # 'items_cold': '<large.pack()>',
999 # 'items_hot': '<large.pack()>',
1000 #},
1001 },
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001002 },
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +00001003 #'cipd_pins': {
1004 # 'packages': [
1005 # {'package_name': ..., 'version': ..., 'path': ...},
1006 # ...
1007 # ],
1008 # 'client_package': {'package_name': ..., 'version': ...},
1009 #},
1010 'outputs_ref': None,
Junji Watanabe54925c32020-09-08 00:56:18 +00001011 'cas_output_root': None,
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +00001012 'version': 5,
maruela9cfd6f2015-09-15 11:03:15 -07001013 }
nodirbe642ff2016-06-09 15:51:51 -07001014
Takuto Ikutad46ea762020-10-07 05:43:22 +00001015 assert os.path.isabs(data.root_dir), ("data.root_dir is not abs path: %s" %
1016 data.root_dir)
1017 file_path.ensure_tree(data.root_dir, 0o700)
1018
maruele2f2cb82016-07-13 14:41:03 -07001019 # See comment for these constants.
maruelcffa0542017-04-07 08:39:20 -07001020 # TODO(maruel): This is not obvious. Change this to become an error once we
1021 # make the constant_run_path an exposed flag.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001022 if constant_run_path and data.root_dir:
1023 run_dir = os.path.join(data.root_dir, ISOLATED_RUN_DIR)
maruel5c4eed82017-05-26 05:33:40 -07001024 if os.path.isdir(run_dir):
1025 file_path.rmtree(run_dir)
Lei Leife202df2019-06-11 17:33:34 +00001026 os.mkdir(run_dir, 0o700)
maruelcffa0542017-04-07 08:39:20 -07001027 else:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001028 run_dir = make_temp_dir(ISOLATED_RUN_DIR, data.root_dir)
Junji Watanabe1adba7b2020-09-18 07:03:58 +00001029
1030 # True if CAS is used for download/upload files.
1031 use_cas = bool(data.cas_digest)
1032
maruel03e11842016-07-14 10:50:16 -07001033 # storage should be normally set but don't crash if it is not. This can happen
1034 # as Swarming task can run without an isolate server.
Junji Watanabe1adba7b2020-09-18 07:03:58 +00001035 out_dir = None
1036 if data.storage or use_cas:
1037 out_dir = make_temp_dir(ISOLATED_OUT_DIR, data.root_dir)
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001038 tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, data.root_dir)
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001039 isolated_client_dir = make_temp_dir(ISOLATED_CLIENT_DIR, data.root_dir)
nodir55be77b2016-05-03 09:39:57 -07001040 cwd = run_dir
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001041 if data.relative_cwd:
1042 cwd = os.path.normpath(os.path.join(cwd, data.relative_cwd))
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001043 command = data.command
Ye Kuangfb0bad62020-07-28 08:07:25 +00001044 go_isolated_client = None
1045 if data.use_go_isolated:
1046 go_isolated_client = os.path.join(isolated_client_dir,
1047 'isolated' + cipd.EXECUTABLE_SUFFIX)
Junji Watanabe1adba7b2020-09-18 07:03:58 +00001048
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001049 cas_client = None
1050 cas_client_dir = make_temp_dir(_CAS_CLIENT_DIR, data.root_dir)
Junji Watanabe1adba7b2020-09-18 07:03:58 +00001051 if use_cas:
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001052 cas_client = os.path.join(cas_client_dir, 'cas' + cipd.EXECUTABLE_SUFFIX)
1053
nodir55be77b2016-05-03 09:39:57 -07001054 try:
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001055 with data.install_packages_fn(run_dir, isolated_client_dir,
1056 cas_client_dir) as cipd_info:
vadimsh232f5a82017-01-20 19:23:44 -08001057 if cipd_info:
1058 result['stats']['cipd'] = cipd_info.stats
1059 result['cipd_pins'] = cipd_info.pins
nodir90bc8dc2016-06-15 13:35:21 -07001060
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001061 isolated_stats = result['stats'].setdefault('isolated', {})
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001062 if data.isolated_hash:
Takuto Ikutad03ffcc2019-12-02 01:04:23 +00001063 if data.use_go_isolated:
Takuto Ikuta57219f42020-11-02 07:35:36 +00001064 stats = _fetch_and_map_with_go_isolated(
Takuto Ikuta90397ca2020-01-08 10:07:55 +00001065 isolated_hash=data.isolated_hash,
1066 storage=data.storage,
Takuto Ikuta90397ca2020-01-08 10:07:55 +00001067 outdir=run_dir,
1068 go_cache_dir=data.go_cache_dir,
Takuto Ikuta879788c2020-01-10 08:00:26 +00001069 policies=data.go_cache_policies,
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +00001070 isolated_client=go_isolated_client,
1071 tmp_dir=tmp_dir)
Takuto Ikuta90397ca2020-01-08 10:07:55 +00001072 else:
Takuto Ikuta57219f42020-11-02 07:35:36 +00001073 stats = fetch_and_map(
Takuto Ikutad03ffcc2019-12-02 01:04:23 +00001074 isolated_hash=data.isolated_hash,
1075 storage=data.storage,
1076 cache=data.isolate_cache,
Takuto Ikuta16fac4b2019-12-09 04:57:18 +00001077 outdir=run_dir)
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001078 isolated_stats['download'].update(stats)
Takuto Ikutab58dbd12020-06-05 09:29:14 +00001079
Junji Watanabe54925c32020-09-08 00:56:18 +00001080 elif data.cas_digest:
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001081 stats = _fetch_and_map_with_cas(
1082 cas_client=cas_client,
1083 digest=data.cas_digest,
1084 instance=data.cas_instance,
1085 output_dir=run_dir,
Junji Watanabeb03450b2020-09-25 05:09:27 +00001086 cache_dir=data.cas_cache_dir,
Takuto Ikutaae391c52020-12-03 08:43:45 +00001087 policies=data.cas_cache_policies,
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001088 kvs_dir=data.cas_kvs,
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +00001089 tmp_dir=tmp_dir)
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001090 isolated_stats['download'].update(stats)
Junji Watanabe54925c32020-09-08 00:56:18 +00001091
maruelabec63c2017-04-26 11:53:24 -07001092 if not command:
1093 # Handle this as a task failure, not an internal failure.
1094 sys.stderr.write(
1095 '<No command was specified!>\n'
1096 '<Please secify a command when triggering your Swarming task>\n')
1097 result['exit_code'] = 1
1098 return result
nodirbe642ff2016-06-09 15:51:51 -07001099
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001100 if not cwd.startswith(run_dir):
1101 # Handle this as a task failure, not an internal failure. This is a
1102 # 'last chance' way to gate against directory escape.
1103 sys.stderr.write('<Relative CWD is outside of run directory!>\n')
1104 result['exit_code'] = 1
1105 return result
1106
1107 if not os.path.isdir(cwd):
1108 # Accepts relative_cwd that does not exist.
Lei Leife202df2019-06-11 17:33:34 +00001109 os.makedirs(cwd, 0o700)
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001110
vadimsh232f5a82017-01-20 19:23:44 -08001111 # If we have an explicit list of files to return, make sure their
1112 # directories exist now.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001113 if data.storage and data.outputs:
1114 isolateserver.create_directories(run_dir, data.outputs)
aludwin0a8e17d2016-10-27 15:57:39 -07001115
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001116 with data.install_named_caches(run_dir):
nodird6160682017-02-02 13:03:35 -08001117 sys.stdout.flush()
1118 start = time.time()
1119 try:
vadimsh9c54b2c2017-07-25 14:08:29 -07001120 # Need to switch the default account before 'get_command_env' call,
1121 # so it can grab correct value of LUCI_CONTEXT env var.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001122 with set_luci_context_account(data.switch_to_account, tmp_dir):
1123 env = get_command_env(
Roberto Carrillo71ade6d2018-10-08 22:30:24 +00001124 tmp_dir, cipd_info, run_dir, data.env, data.env_prefix, out_dir,
1125 data.bot_file)
Brian Sheedy7a761172019-08-30 22:55:14 +00001126 command = tools.find_executable(command, env)
Robert Iannucci24ae76a2018-02-26 12:51:18 -08001127 command = process_command(command, out_dir, data.bot_file)
1128 file_path.ensure_command_has_abs_path(command, cwd)
1129
vadimsh9c54b2c2017-07-25 14:08:29 -07001130 result['exit_code'], result['had_hard_timeout'] = run_command(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +00001131 command, cwd, env, data.hard_timeout, data.grace_period,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001132 data.lower_priority, data.containment)
nodird6160682017-02-02 13:03:35 -08001133 finally:
1134 result['duration'] = max(time.time() - start, 0)
Seth Koehler49139812017-12-19 13:59:33 -05001135
Ye Kuangbc4e8402020-07-29 09:54:30 +00001136 if out_dir:
1137 # Try to link files to the output directory, if specified.
1138 link_outputs_to_outdir(run_dir, out_dir, data.outputs)
1139 isolated_stats = result['stats'].setdefault('isolated', {})
Junji Watanabe1adba7b2020-09-18 07:03:58 +00001140 if use_cas:
1141 result['cas_output_root'], isolated_stats['upload'] = (
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +00001142 upload_outdir_with_cas(cas_client, data.cas_instance, out_dir,
1143 tmp_dir))
Junji Watanabe1adba7b2020-09-18 07:03:58 +00001144 else:
1145 # This could use |go_isolated_client|, so make sure it runs when the
1146 # CIPD package still exists.
1147 result['outputs_ref'], isolated_stats['upload'] = (
1148 upload_out_dir(data.storage, out_dir, go_isolated_client))
Seth Koehler49139812017-12-19 13:59:33 -05001149 # We successfully ran the command, set internal_failure back to
1150 # None (even if the command failed, it's not an internal error).
1151 result['internal_failure'] = None
maruela9cfd6f2015-09-15 11:03:15 -07001152 except Exception as e:
nodir90bc8dc2016-06-15 13:35:21 -07001153 # An internal error occurred. Report accordingly so the swarming task will
1154 # be retried automatically.
maruel12e30012015-10-09 11:55:35 -07001155 logging.exception('internal failure: %s', e)
maruela9cfd6f2015-09-15 11:03:15 -07001156 result['internal_failure'] = str(e)
1157 on_error.report(None)
aludwin0a8e17d2016-10-27 15:57:39 -07001158
1159 # Clean up
maruela9cfd6f2015-09-15 11:03:15 -07001160 finally:
1161 try:
Ye Kuangbc4e8402020-07-29 09:54:30 +00001162 success = True
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001163 if data.leak_temp_dir:
nodir32a1ec12016-10-26 18:34:07 -07001164 success = True
maruela9cfd6f2015-09-15 11:03:15 -07001165 logging.warning(
1166 'Deliberately leaking %s for later examination', run_dir)
marueleb5fbee2015-09-17 13:01:36 -07001167 else:
maruel84537cb2015-10-16 14:21:28 -07001168 # On Windows rmtree(run_dir) call above has a synchronization effect: it
1169 # finishes only when all task child processes terminate (since a running
1170 # process locks *.exe file). Examine out_dir only after that call
1171 # completes (since child processes may write to out_dir too and we need
1172 # to wait for them to finish).
Junji Watanabeb03450b2020-09-25 05:09:27 +00001173 dirs_to_remove = [run_dir, tmp_dir, isolated_client_dir, cas_client_dir]
Ye Kuangbc4e8402020-07-29 09:54:30 +00001174 if out_dir:
1175 dirs_to_remove.append(out_dir)
1176 for directory in dirs_to_remove:
Takuto Ikuta69c0d662019-11-27 01:18:08 +00001177 if not fs.isdir(directory):
1178 continue
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001179 start = time.time()
maruel84537cb2015-10-16 14:21:28 -07001180 try:
Junji Watanabecc4eefd2021-01-19 01:46:10 +00001181 file_path.rmtree(directory)
maruel84537cb2015-10-16 14:21:28 -07001182 except OSError as e:
Takuto Ikuta69c0d662019-11-27 01:18:08 +00001183 logging.error('rmtree(%r) failed: %s', directory, e)
maruel84537cb2015-10-16 14:21:28 -07001184 success = False
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001185 finally:
1186 logging.info('Cleanup: rmtree(%r) took %d seconds', directory,
1187 time.time() - start)
maruel84537cb2015-10-16 14:21:28 -07001188 if not success:
Takuto Ikuta69c0d662019-11-27 01:18:08 +00001189 sys.stderr.write(
1190 OUTLIVING_ZOMBIE_MSG % (directory, data.grace_period))
Takuto Ikutad7d64e12020-07-31 06:18:45 +00001191 subprocess42.check_call(['tasklist.exe', '/V'], stdout=sys.stderr)
maruel84537cb2015-10-16 14:21:28 -07001192 if result['exit_code'] == 0:
1193 result['exit_code'] = 1
maruela9cfd6f2015-09-15 11:03:15 -07001194
maruela9cfd6f2015-09-15 11:03:15 -07001195 if not success and result['exit_code'] == 0:
1196 result['exit_code'] = 1
1197 except Exception as e:
1198 # Swallow any exception in the main finally clause.
nodir9130f072016-05-27 13:59:08 -07001199 if out_dir:
1200 logging.exception('Leaking out_dir %s: %s', out_dir, e)
maruela9cfd6f2015-09-15 11:03:15 -07001201 result['internal_failure'] = str(e)
Takuto Ikutaa9a907b2020-04-17 08:50:50 +00001202 on_error.report(None)
maruela9cfd6f2015-09-15 11:03:15 -07001203 return result
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001204
1205
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001206def run_tha_test(data, result_json):
nodir55be77b2016-05-03 09:39:57 -07001207 """Runs an executable and records execution metadata.
1208
nodir55be77b2016-05-03 09:39:57 -07001209 If isolated_hash is specified, downloads the dependencies in the cache,
1210 hardlinks them into a temporary directory and runs the command specified in
1211 the .isolated.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001212
1213 A temporary directory is created to hold the output files. The content inside
1214 this directory will be uploaded back to |storage| packaged as a .isolated
1215 file.
1216
1217 Arguments:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001218 - data: TaskData instance.
1219 - result_json: File path to dump result metadata into. If set, the process
1220 exit code is always 0 unless an internal error occurred.
maruela9cfd6f2015-09-15 11:03:15 -07001221
1222 Returns:
1223 Process exit code that should be used.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001224 """
maruela76b9ee2015-12-15 06:18:08 -08001225 if result_json:
1226 # Write a json output file right away in case we get killed.
1227 result = {
Junji Watanabe54925c32020-09-08 00:56:18 +00001228 'exit_code': None,
1229 'had_hard_timeout': False,
1230 'internal_failure': 'Was terminated before completion',
1231 'outputs_ref': None,
1232 'cas_output_root': None,
1233 'version': 5,
maruela76b9ee2015-12-15 06:18:08 -08001234 }
1235 tools.write_json(result_json, result, dense=True)
1236
maruela9cfd6f2015-09-15 11:03:15 -07001237 # run_isolated exit code. Depends on if result_json is used or not.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001238 result = map_and_run(data, True)
maruela9cfd6f2015-09-15 11:03:15 -07001239 logging.info('Result:\n%s', tools.format_json(result, dense=True))
bpastene3ae09522016-06-10 17:12:59 -07001240
maruela9cfd6f2015-09-15 11:03:15 -07001241 if result_json:
maruel05d5a882015-09-21 13:59:02 -07001242 # We've found tests to delete 'work' when quitting, causing an exception
1243 # here. Try to recreate the directory if necessary.
nodire5028a92016-04-29 14:38:21 -07001244 file_path.ensure_tree(os.path.dirname(result_json))
maruela9cfd6f2015-09-15 11:03:15 -07001245 tools.write_json(result_json, result, dense=True)
1246 # Only return 1 if there was an internal error.
1247 return int(bool(result['internal_failure']))
maruel@chromium.org781ccf62013-09-17 19:39:47 +00001248
maruela9cfd6f2015-09-15 11:03:15 -07001249 # Marshall into old-style inline output.
1250 if result['outputs_ref']:
Marc-Antoine Ruel793bff32019-04-18 17:50:48 +00001251 # pylint: disable=unsubscriptable-object
maruela9cfd6f2015-09-15 11:03:15 -07001252 data = {
Junji Watanabe38b28b02020-04-23 10:23:30 +00001253 'hash': result['outputs_ref']['isolated'],
1254 'namespace': result['outputs_ref']['namespace'],
1255 'storage': result['outputs_ref']['isolatedserver'],
maruela9cfd6f2015-09-15 11:03:15 -07001256 }
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -05001257 sys.stdout.flush()
Junji Watanabe38b28b02020-04-23 10:23:30 +00001258 print('[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
1259 tools.format_json(data, dense=True))
maruelb76604c2015-11-11 11:53:44 -08001260 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -07001261 return result['exit_code'] or int(bool(result['internal_failure']))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001262
1263
iannuccib58d10d2017-03-18 02:00:25 -07001264# Yielded by 'install_client_and_packages'.
vadimsh232f5a82017-01-20 19:23:44 -08001265CipdInfo = collections.namedtuple('CipdInfo', [
1266 'client', # cipd.CipdClient object
1267 'cache_dir', # absolute path to bot-global cipd tag and instance cache
1268 'stats', # dict with stats to return to the server
1269 'pins', # dict with installed cipd pins to return to the server
1270])
1271
1272
1273@contextlib.contextmanager
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001274def noop_install_packages(_run_dir, _isolated_dir, _cas_dir):
iannuccib58d10d2017-03-18 02:00:25 -07001275 """Placeholder for 'install_client_and_packages' if cipd is disabled."""
vadimsh232f5a82017-01-20 19:23:44 -08001276 yield None
1277
1278
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001279def _install_packages(run_dir, cipd_cache_dir, client, packages):
iannuccib58d10d2017-03-18 02:00:25 -07001280 """Calls 'cipd ensure' for packages.
1281
1282 Args:
1283 run_dir (str): root of installation.
1284 cipd_cache_dir (str): the directory to use for the cipd package cache.
1285 client (CipdClient): the cipd client to use
1286 packages: packages to install, list [(path, package_name, version), ...].
iannuccib58d10d2017-03-18 02:00:25 -07001287
1288 Returns: list of pinned packages. Looks like [
1289 {
1290 'path': 'subdirectory',
1291 'package_name': 'resolved/package/name',
1292 'version': 'deadbeef...',
1293 },
1294 ...
1295 ]
1296 """
1297 package_pins = [None]*len(packages)
1298 def insert_pin(path, name, version, idx):
1299 package_pins[idx] = {
1300 'package_name': name,
1301 # swarming deals with 'root' as '.'
1302 'path': path or '.',
1303 'version': version,
1304 }
1305
1306 by_path = collections.defaultdict(list)
1307 for i, (path, name, version) in enumerate(packages):
1308 # cipd deals with 'root' as ''
1309 if path == '.':
1310 path = ''
1311 by_path[path].append((name, version, i))
1312
1313 pins = client.ensure(
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001314 run_dir,
1315 {
1316 subdir: [(name, vers) for name, vers, _ in pkgs
1317 ] for subdir, pkgs in by_path.items()
1318 },
1319 cache_dir=cipd_cache_dir,
iannuccib58d10d2017-03-18 02:00:25 -07001320 )
1321
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001322 for subdir, pin_list in sorted(pins.items()):
iannuccib58d10d2017-03-18 02:00:25 -07001323 this_subdir = by_path[subdir]
1324 for i, (name, version) in enumerate(pin_list):
1325 insert_pin(subdir, name, version, this_subdir[i][2])
1326
Robert Iannucci461b30d2017-12-13 11:34:03 -08001327 assert None not in package_pins, (packages, pins, package_pins)
iannuccib58d10d2017-03-18 02:00:25 -07001328
1329 return package_pins
1330
1331
vadimsh232f5a82017-01-20 19:23:44 -08001332@contextlib.contextmanager
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001333def install_client_and_packages(run_dir, packages, service_url,
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001334 client_package_name, client_version, cache_dir,
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001335 isolated_dir, cas_dir):
vadimsh902948e2017-01-20 15:57:32 -08001336 """Bootstraps CIPD client and installs CIPD packages.
iannucci96fcccc2016-08-30 15:52:22 -07001337
vadimsh232f5a82017-01-20 19:23:44 -08001338 Yields CipdClient, stats, client info and pins (as single CipdInfo object).
1339
1340 Pins and the CIPD client info are in the form of:
iannucci96fcccc2016-08-30 15:52:22 -07001341 [
1342 {
1343 "path": path, "package_name": package_name, "version": version,
1344 },
1345 ...
1346 ]
vadimsh902948e2017-01-20 15:57:32 -08001347 (the CIPD client info is a single dictionary instead of a list)
iannucci96fcccc2016-08-30 15:52:22 -07001348
1349 such that they correspond 1:1 to all input package arguments from the command
1350 line. These dictionaries make their all the way back to swarming, where they
1351 become the arguments of CipdPackage.
nodirbe642ff2016-06-09 15:51:51 -07001352
vadimsh902948e2017-01-20 15:57:32 -08001353 If 'packages' list is empty, will bootstrap CIPD client, but won't install
1354 any packages.
1355
1356 The bootstrapped client (regardless whether 'packages' list is empty or not),
vadimsh232f5a82017-01-20 19:23:44 -08001357 will be made available to the task via $PATH.
vadimsh902948e2017-01-20 15:57:32 -08001358
nodirbe642ff2016-06-09 15:51:51 -07001359 Args:
nodir90bc8dc2016-06-15 13:35:21 -07001360 run_dir (str): root of installation.
vadimsh902948e2017-01-20 15:57:32 -08001361 packages: packages to install, list [(path, package_name, version), ...].
nodirbe642ff2016-06-09 15:51:51 -07001362 service_url (str): CIPD server url, e.g.
1363 "https://chrome-infra-packages.appspot.com."
nodir90bc8dc2016-06-15 13:35:21 -07001364 client_package_name (str): CIPD package name of CIPD client.
1365 client_version (str): Version of CIPD client.
nodirbe642ff2016-06-09 15:51:51 -07001366 cache_dir (str): where to keep cache of cipd clients, packages and tags.
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001367 isolated_dir (str): where to download isolated client.
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001368 cas_dir (str): where to download cas client.
nodirbe642ff2016-06-09 15:51:51 -07001369 """
1370 assert cache_dir
nodir90bc8dc2016-06-15 13:35:21 -07001371
nodirbe642ff2016-06-09 15:51:51 -07001372 start = time.time()
nodirbe642ff2016-06-09 15:51:51 -07001373
vadimsh902948e2017-01-20 15:57:32 -08001374 cache_dir = os.path.abspath(cache_dir)
vadimsh232f5a82017-01-20 19:23:44 -08001375 cipd_cache_dir = os.path.join(cache_dir, 'cache') # tag and instance caches
nodir90bc8dc2016-06-15 13:35:21 -07001376 run_dir = os.path.abspath(run_dir)
vadimsh902948e2017-01-20 15:57:32 -08001377 packages = packages or []
nodir90bc8dc2016-06-15 13:35:21 -07001378
nodirbe642ff2016-06-09 15:51:51 -07001379 get_client_start = time.time()
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001380 client_manager = cipd.get_client(cache_dir, service_url, client_package_name,
1381 client_version)
iannucci96fcccc2016-08-30 15:52:22 -07001382
nodirbe642ff2016-06-09 15:51:51 -07001383 with client_manager as client:
1384 get_client_duration = time.time() - get_client_start
nodir90bc8dc2016-06-15 13:35:21 -07001385
iannuccib58d10d2017-03-18 02:00:25 -07001386 package_pins = []
1387 if packages:
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001388 package_pins = _install_packages(run_dir, cipd_cache_dir, client,
1389 packages)
iannuccib58d10d2017-03-18 02:00:25 -07001390
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001391 # Install isolated client to |isolated_dir|.
Takuto Ikuta02edca22019-11-29 10:04:51 +00001392 _install_packages(isolated_dir, cipd_cache_dir, client,
Takuto Ikuta9c4eb1d2020-10-05 03:40:14 +00001393 [('', ISOLATED_PACKAGE, _LUCI_GO_REVISION)])
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001394
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001395 # Install cas client to |cas_dir|.
1396 _install_packages(cas_dir, cipd_cache_dir, client,
Takuto Ikuta9c4eb1d2020-10-05 03:40:14 +00001397 [('', _CAS_PACKAGE, _LUCI_GO_REVISION)])
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001398
iannuccib58d10d2017-03-18 02:00:25 -07001399 file_path.make_tree_files_read_only(run_dir)
nodir90bc8dc2016-06-15 13:35:21 -07001400
vadimsh232f5a82017-01-20 19:23:44 -08001401 total_duration = time.time() - start
Junji Watanabe38b28b02020-04-23 10:23:30 +00001402 logging.info('Installing CIPD client and packages took %d seconds',
1403 total_duration)
nodir90bc8dc2016-06-15 13:35:21 -07001404
vadimsh232f5a82017-01-20 19:23:44 -08001405 yield CipdInfo(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001406 client=client,
1407 cache_dir=cipd_cache_dir,
1408 stats={
1409 'duration': total_duration,
1410 'get_client_duration': get_client_duration,
iannuccib58d10d2017-03-18 02:00:25 -07001411 },
Junji Watanabe38b28b02020-04-23 10:23:30 +00001412 pins={
1413 'client_package': {
1414 'package_name': client.package_name,
1415 'version': client.instance_id,
1416 },
1417 'packages': package_pins,
1418 })
nodirbe642ff2016-06-09 15:51:51 -07001419
1420
1421def create_option_parser():
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04001422 parser = logging_utils.OptionParserWithLogging(
nodir55be77b2016-05-03 09:39:57 -07001423 usage='%prog <options> [command to run or extra args]',
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001424 version=__version__,
1425 log_file=RUN_ISOLATED_LOG_FILE)
maruela9cfd6f2015-09-15 11:03:15 -07001426 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001427 '--clean',
1428 action='store_true',
maruel36a963d2016-04-08 17:15:49 -07001429 help='Cleans the cache, trimming it necessary and remove corrupted items '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001430 'and returns without executing anything; use with -v to know what '
1431 'was done')
maruel36a963d2016-04-08 17:15:49 -07001432 parser.add_option(
maruela9cfd6f2015-09-15 11:03:15 -07001433 '--json',
1434 help='dump output metadata to json file. When used, run_isolated returns '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001435 'non-zero only on internal failure')
maruel6be7f9e2015-10-01 12:25:30 -07001436 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -08001437 '--hard-timeout', type='float', help='Enforce hard timeout in execution')
maruel6be7f9e2015-10-01 12:25:30 -07001438 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001439 '--grace-period',
1440 type='float',
maruel6be7f9e2015-10-01 12:25:30 -07001441 help='Grace period between SIGTERM and SIGKILL')
bpastene3ae09522016-06-10 17:12:59 -07001442 parser.add_option(
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001443 '--relative-cwd',
Takuto Ikuta18ca29a2020-12-04 07:34:20 +00001444 help='Ignore the isolated \'relative_cwd\' and use this one instead')
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001445 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001446 '--env',
1447 default=[],
1448 action='append',
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001449 help='Environment variables to set for the child process')
1450 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001451 '--env-prefix',
1452 default=[],
1453 action='append',
Robert Iannuccibf5f84c2017-11-22 12:56:50 -08001454 help='Specify a VAR=./path/fragment to put in the environment variable '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001455 'before executing the command. The path fragment must be relative '
1456 'to the isolated run directory, and must not contain a `..` token. '
1457 'The path will be made absolute and prepended to the indicated '
1458 '$VAR using the OS\'s path separator. Multiple items for the same '
1459 '$VAR will be prepended in order.')
Robert Iannuccibf5f84c2017-11-22 12:56:50 -08001460 parser.add_option(
bpastene3ae09522016-06-10 17:12:59 -07001461 '--bot-file',
1462 help='Path to a file describing the state of the host. The content is '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001463 'defined by on_before_task() in bot_config.')
aludwin7556e0c2016-10-26 08:46:10 -07001464 parser.add_option(
vadimsh9c54b2c2017-07-25 14:08:29 -07001465 '--switch-to-account',
1466 help='If given, switches LUCI_CONTEXT to given logical service account '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001467 '(e.g. "task" or "system") before launching the isolated process.')
vadimsh9c54b2c2017-07-25 14:08:29 -07001468 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001469 '--output',
1470 action='append',
aludwin0a8e17d2016-10-27 15:57:39 -07001471 help='Specifies an output to return. If no outputs are specified, all '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001472 'files located in $(ISOLATED_OUTDIR) will be returned; '
1473 'otherwise, outputs in both $(ISOLATED_OUTDIR) and those '
1474 'specified by --output option (there can be multiple) will be '
1475 'returned. Note that if a file in OUT_DIR has the same path '
1476 'as an --output option, the --output version will be returned.')
aludwin0a8e17d2016-10-27 15:57:39 -07001477 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001478 '-a',
1479 '--argsfile',
aludwin7556e0c2016-10-26 08:46:10 -07001480 # This is actually handled in parse_args; it's included here purely so it
1481 # can make it into the help text.
1482 help='Specify a file containing a JSON array of arguments to this '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001483 'script. If --argsfile is provided, no other argument may be '
1484 'provided on the command line.')
Takuto Ikutad4be2f12020-05-12 02:15:25 +00001485 parser.add_option(
1486 '--report-on-exception',
1487 action='store_true',
1488 help='Whether report exception during execution to isolate server. '
1489 'This flag should only be used in swarming bot.')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001490
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001491 group = optparse.OptionGroup(parser, 'Data source - Isolate server')
Junji Watanabe54925c32020-09-08 00:56:18 +00001492 # Deprecated. Isoate server is being migrated to RBE-CAS.
1493 # Remove --isolated and isolate server options after migration.
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001494 group.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -05001495 '-s', '--isolated',
nodir55be77b2016-05-03 09:39:57 -07001496 help='Hash of the .isolated to grab from the isolate server.')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001497 isolateserver.add_isolate_server_options(group)
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001498 parser.add_option_group(group)
1499
1500 group = optparse.OptionGroup(parser,
1501 'Data source - Content Addressed Storage')
Junji Watanabe54925c32020-09-08 00:56:18 +00001502 group.add_option(
1503 '--cas-instance', help='Full CAS instance name for input/output files.')
1504 group.add_option(
1505 '--cas-digest',
1506 help='Digest of the input root on RBE-CAS. The format is '
1507 '`{hash}/{size_bytes}`.')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001508 parser.add_option_group(group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001509
Junji Watanabeb03450b2020-09-25 05:09:27 +00001510 # Cache options.
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -04001511 isolateserver.add_cache_options(parser)
Junji Watanabeb03450b2020-09-25 05:09:27 +00001512 add_cas_cache_options(parser)
nodirbe642ff2016-06-09 15:51:51 -07001513
1514 cipd.add_cipd_options(parser)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001515
1516 group = optparse.OptionGroup(parser, 'Named caches')
1517 group.add_option(
1518 '--named-cache',
1519 dest='named_caches',
1520 action='append',
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001521 nargs=3,
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001522 default=[],
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001523 help='A named cache to request. Accepts 3 arguments: name, path, hint. '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001524 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. '
1525 'path is a path relative to the run dir where the cache directory '
1526 'must be put to. '
1527 'This option can be specified more than once.')
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001528 group.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001529 '--named-cache-root',
1530 default='named_caches',
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001531 help='Cache root directory. Default=%default')
1532 parser.add_option_group(group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001533
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001534 group = optparse.OptionGroup(parser, 'Process containment')
1535 parser.add_option(
1536 '--lower-priority', action='store_true',
1537 help='Lowers the child process priority')
1538 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001539 '--containment-type',
1540 choices=('NONE', 'AUTO', 'JOB_OBJECT'),
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001541 default='NONE',
1542 help='Type of container to use')
1543 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001544 '--limit-processes',
1545 type='int',
1546 default=0,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001547 help='Maximum number of active processes in the containment')
1548 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001549 '--limit-total-committed-memory',
1550 type='int',
1551 default=0,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001552 help='Maximum sum of committed memory in the containment')
1553 parser.add_option_group(group)
1554
1555 group = optparse.OptionGroup(parser, 'Debugging')
1556 group.add_option(
Kenneth Russell61d42352014-09-15 11:41:16 -07001557 '--leak-temp-dir',
1558 action='store_true',
nodirbe642ff2016-06-09 15:51:51 -07001559 help='Deliberately leak isolate\'s temp dir for later examination. '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001560 'Default: %default')
1561 group.add_option('--root-dir', help='Use a directory instead of a random one')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001562 parser.add_option_group(group)
Kenneth Russell61d42352014-09-15 11:41:16 -07001563
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001564 auth.add_auth_options(parser)
nodirbe642ff2016-06-09 15:51:51 -07001565
Ye Kuang1d096cb2020-06-26 08:38:21 +00001566 parser.set_defaults(cache='cache')
nodirbe642ff2016-06-09 15:51:51 -07001567 return parser
1568
1569
Junji Watanabeb03450b2020-09-25 05:09:27 +00001570def add_cas_cache_options(parser):
1571 group = optparse.OptionGroup(parser, 'CAS cache management')
1572 group.add_option(
1573 '--cas-cache',
1574 metavar='DIR',
1575 default='cas-cache',
1576 help='Directory to keep a local cache of the files. Accelerates download '
1577 'by reusing already downloaded files. Default=%default')
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001578 group.add_option(
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001579 '--kvs-dir',
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001580 default='',
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001581 help='CAS cache dir using kvs for small files. Default=%default')
Junji Watanabeb03450b2020-09-25 05:09:27 +00001582 parser.add_option_group(group)
1583
1584
1585def process_cas_cache_options(options):
1586 if options.cas_cache:
1587 policies = local_caching.CachePolicies(
1588 max_cache_size=options.max_cache_size,
1589 min_free_space=options.min_free_space,
1590 # max_items isn't used for CAS cache for now.
1591 max_items=None,
1592 max_age_secs=MAX_AGE_SECS)
1593
1594 return local_caching.DiskContentAddressedCache(
1595 six.text_type(os.path.abspath(options.cas_cache)), policies, trim=False)
1596 return local_caching.MemoryContentAddressedCache()
1597
1598
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001599def process_named_cache_options(parser, options, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001600 """Validates named cache options and returns a CacheManager."""
1601 if options.named_caches and not options.named_cache_root:
1602 parser.error('--named-cache is specified, but --named-cache-root is empty')
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001603 for name, path, hint in options.named_caches:
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001604 if not CACHE_NAME_RE.match(name):
1605 parser.error(
1606 'cache name %r does not match %r' % (name, CACHE_NAME_RE.pattern))
1607 if not path:
1608 parser.error('cache path cannot be empty')
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001609 try:
Takuto Ikuta630f99d2020-07-02 12:59:35 +00001610 int(hint)
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001611 except ValueError:
1612 parser.error('cache hint must be a number')
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001613 if options.named_cache_root:
1614 # Make these configurable later if there is use case but for now it's fairly
1615 # safe values.
1616 # In practice, a fair chunk of bots are already recycled on a daily schedule
1617 # so this code doesn't have any effect to them, unless they are preloaded
1618 # with a really old cache.
1619 policies = local_caching.CachePolicies(
1620 # 1TiB.
1621 max_cache_size=1024*1024*1024*1024,
1622 min_free_space=options.min_free_space,
1623 max_items=50,
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001624 max_age_secs=MAX_AGE_SECS)
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001625 root_dir = six.text_type(os.path.abspath(options.named_cache_root))
John Budorickc6186972020-02-26 00:58:14 +00001626 cache = local_caching.NamedCache(root_dir, policies, time_fn=time_fn)
1627 # Touch any named caches we're going to use to minimize thrashing
1628 # between tasks that request some (but not all) of the same named caches.
John Budorick0a4dab62020-03-02 22:23:35 +00001629 cache.touch(*[name for name, _, _ in options.named_caches])
John Budorickc6186972020-02-26 00:58:14 +00001630 return cache
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001631 return None
1632
1633
aludwin7556e0c2016-10-26 08:46:10 -07001634def parse_args(args):
1635 # Create a fake mini-parser just to get out the "-a" command. Note that
1636 # it's not documented here; instead, it's documented in create_option_parser
1637 # even though that parser will never actually get to parse it. This is
1638 # because --argsfile is exclusive with all other options and arguments.
1639 file_argparse = argparse.ArgumentParser(add_help=False)
1640 file_argparse.add_argument('-a', '--argsfile')
1641 (file_args, nonfile_args) = file_argparse.parse_known_args(args)
1642 if file_args.argsfile:
1643 if nonfile_args:
1644 file_argparse.error('Can\'t specify --argsfile with'
1645 'any other arguments (%s)' % nonfile_args)
1646 try:
1647 with open(file_args.argsfile, 'r') as f:
1648 args = json.load(f)
1649 except (IOError, OSError, ValueError) as e:
1650 # We don't need to error out here - "args" is now empty,
1651 # so the call below to parser.parse_args(args) will fail
1652 # and print the full help text.
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +00001653 print('Couldn\'t read arguments: %s' % e, file=sys.stderr)
aludwin7556e0c2016-10-26 08:46:10 -07001654
1655 # Even if we failed to read the args, just call the normal parser now since it
1656 # will print the correct help message.
nodirbe642ff2016-06-09 15:51:51 -07001657 parser = create_option_parser()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001658 options, args = parser.parse_args(args)
Ye Kuangfff1e502020-07-13 13:21:57 +00001659 if not isinstance(options.cipd_enabled, (bool, int)):
1660 options.cipd_enabled = distutils.util.strtobool(options.cipd_enabled)
aludwin7556e0c2016-10-26 08:46:10 -07001661 return (parser, options, args)
1662
1663
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001664def _calc_named_cache_hint(named_cache, named_caches):
1665 """Returns the expected size of the missing named caches."""
1666 present = named_cache.available
1667 size = 0
1668 for name, _, hint in named_caches:
1669 if name not in present:
Takuto Ikuta630f99d2020-07-02 12:59:35 +00001670 hint = int(hint)
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001671 if hint > 0:
1672 size += hint
1673 return size
1674
1675
Takuto Ikutaae391c52020-12-03 08:43:45 +00001676def _clean_cmd(parser, options, caches, root):
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001677 """Cleanup cache dirs/files."""
1678 if options.isolated:
1679 parser.error('Can\'t use --isolated with --clean.')
1680 if options.isolate_server:
1681 parser.error('Can\'t use --isolate-server with --clean.')
1682 if options.json:
1683 parser.error('Can\'t use --json with --clean.')
1684 if options.named_caches:
1685 parser.error('Can\t use --named-cache with --clean.')
1686 if options.cas_instance or options.cas_digest:
1687 parser.error('Can\t use --cas-instance, --cas-digest with --clean.')
1688
1689 logging.info("initial free space: %d", file_path.get_free_space(root))
1690
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001691 if options.kvs_dir and fs.isdir(six.text_type(options.kvs_dir)):
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001692 # Remove kvs file if its size exceeds fixed threshold.
Takuto Ikutab1b70062021-03-22 01:02:41 +00001693 kvs_dir = six.text_type(options.kvs_dir)
1694 size = file_path.get_recursive_size(kvs_dir)
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001695 if size >= _CAS_KVS_CACHE_THRESHOLD:
1696 logging.info("remove kvs dir with size: %d", size)
Takuto Ikutab1b70062021-03-22 01:02:41 +00001697 file_path.rmtree(kvs_dir)
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001698
1699 # Trim first, then clean.
1700 local_caching.trim_caches(
1701 caches,
1702 root,
1703 min_free_space=options.min_free_space,
1704 max_age_secs=MAX_AGE_SECS)
1705 logging.info("free space after trim: %d", file_path.get_free_space(root))
1706 for c in caches:
1707 c.cleanup()
1708 logging.info("free space after cleanup: %d", file_path.get_free_space(root))
1709
1710
aludwin7556e0c2016-10-26 08:46:10 -07001711def main(args):
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -05001712 # Warning: when --argsfile is used, the strings are unicode instances, when
1713 # parsed normally, the strings are str instances.
aludwin7556e0c2016-10-26 08:46:10 -07001714 (parser, options, args) = parse_args(args)
maruel36a963d2016-04-08 17:15:49 -07001715
Takuto Ikuta74682862021-02-03 04:49:12 +00001716 SWARMING_SERVER = 'SWARMING_SERVER'
1717 if options.report_on_exception and SWARMING_SERVER in os.environ:
1718 on_error.report_on_exception_exit(os.environ[SWARMING_SERVER])
Takuto Ikutad4be2f12020-05-12 02:15:25 +00001719
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001720 if not file_path.enable_symlink():
Marc-Antoine Ruel5a024272019-01-15 20:11:16 +00001721 logging.warning('Symlink support is not enabled')
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001722
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001723 named_cache = process_named_cache_options(parser, options)
Marc-Antoine Ruel0d8b0f62018-09-10 14:40:35 +00001724 # hint is 0 if there's no named cache.
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001725 hint = _calc_named_cache_hint(named_cache, options.named_caches)
1726 if hint:
1727 # Increase the --min-free-space value by the hint, and recreate the
1728 # NamedCache instance so it gets the updated CachePolicy.
1729 options.min_free_space += hint
1730 named_cache = process_named_cache_options(parser, options)
1731
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001732 # TODO(crbug.com/932396): Remove this.
Takuto Ikuta4a22c2c2020-06-05 02:02:23 +00001733 use_go_isolated = options.cipd_enabled
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001734
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001735 # TODO(maruel): CIPD caches should be defined at an higher level here too, so
1736 # they can be cleaned the same way.
Takuto Ikutaf1c58442020-10-20 09:03:27 +00001737
1738 isolate_cache = isolateserver.process_cache_options(options, trim=False)
1739 cas_cache = process_cas_cache_options(options)
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +00001740
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001741 caches = []
1742 if isolate_cache:
1743 caches.append(isolate_cache)
Junji Watanabeb03450b2020-09-25 05:09:27 +00001744 if cas_cache:
1745 caches.append(cas_cache)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001746 if named_cache:
1747 caches.append(named_cache)
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001748 root = caches[0].cache_dir if caches else six.text_type(os.getcwd())
maruel36a963d2016-04-08 17:15:49 -07001749 if options.clean:
Takuto Ikutaae391c52020-12-03 08:43:45 +00001750 _clean_cmd(parser, options, caches, root)
maruel36a963d2016-04-08 17:15:49 -07001751 return 0
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001752
1753 # Trim must still be done for the following case:
1754 # - named-cache was used
1755 # - some entries, with a large hint, where missing
1756 # - --min-free-space was increased accordingly, thus trimming is needed
1757 # Otherwise, this will have no effect, as bot_main calls run_isolated with
1758 # --clean after each task.
Takuto Ikutac9ddff22021-02-18 07:58:39 +00001759 additional_buffer = _FREE_SPACE_BUFFER_FOR_CIPD_PACKAGES
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001760 if options.kvs_dir:
Takuto Ikuta7f45c592021-02-09 05:57:05 +00001761 additional_buffer += _CAS_KVS_CACHE_THRESHOLD
Takuto Ikutaa010c532020-10-21 05:42:29 +00001762 local_caching.trim_caches(
1763 caches,
1764 root,
Takuto Ikutac9ddff22021-02-18 07:58:39 +00001765 # Add some buffer for Go CLI.
Takuto Ikuta7f45c592021-02-09 05:57:05 +00001766 min_free_space=options.min_free_space + additional_buffer,
Takuto Ikutaa010c532020-10-21 05:42:29 +00001767 max_age_secs=MAX_AGE_SECS)
maruel36a963d2016-04-08 17:15:49 -07001768
Takuto Ikutaf1c58442020-10-20 09:03:27 +00001769 # Save state of isolate/cas cache not to overwrite state from go client.
1770 if use_go_isolated:
1771 isolate_cache.save()
1772 isolate_cache = None
1773 if cas_cache:
1774 cas_cache.save()
1775 cas_cache = None
1776
nodir55be77b2016-05-03 09:39:57 -07001777 if not options.isolated and not args:
1778 parser.error('--isolated or command to run is required.')
1779
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001780 auth.process_auth_options(parser, options)
nodir55be77b2016-05-03 09:39:57 -07001781
Takuto Ikutaae767b32020-05-11 01:22:19 +00001782 isolateserver.process_isolate_server_options(parser, options, False)
Junji Watanabeed9ce352020-09-25 12:32:07 +00001783 if ISOLATED_OUTDIR_PARAMETER in args and (not options.isolate_server and
1784 not options.cas_instance):
1785 parser.error('%s in args requires --isolate-server or --cas-instance' %
1786 ISOLATED_OUTDIR_PARAMETER)
1787
1788 if options.isolated and not options.isolate_server:
1789 parser.error('--isolated requires --isolate-server')
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001790
nodir90bc8dc2016-06-15 13:35:21 -07001791 if options.root_dir:
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001792 options.root_dir = six.text_type(os.path.abspath(options.root_dir))
Takuto Ikutad46ea762020-10-07 05:43:22 +00001793 else:
1794 options.root_dir = six.text_type(tempfile.mkdtemp(prefix='root'))
maruel12e30012015-10-09 11:55:35 -07001795 if options.json:
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001796 options.json = six.text_type(os.path.abspath(options.json))
nodir55be77b2016-05-03 09:39:57 -07001797
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001798 if any('=' not in i for i in options.env):
1799 parser.error(
1800 '--env required key=value form. value can be skipped to delete '
1801 'the variable')
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -05001802 options.env = dict(i.split('=', 1) for i in options.env)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001803
1804 prefixes = {}
1805 cwd = os.path.realpath(os.getcwd())
1806 for item in options.env_prefix:
1807 if '=' not in item:
1808 parser.error(
1809 '--env-prefix %r is malformed, must be in the form `VAR=./path`'
1810 % item)
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -05001811 key, opath = item.split('=', 1)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001812 if os.path.isabs(opath):
1813 parser.error('--env-prefix %r path is bad, must be relative.' % opath)
1814 opath = os.path.normpath(opath)
1815 if not os.path.realpath(os.path.join(cwd, opath)).startswith(cwd):
1816 parser.error(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001817 '--env-prefix %r path is bad, must be relative and not contain `..`.'
1818 % opath)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001819 prefixes.setdefault(key, []).append(opath)
1820 options.env_prefix = prefixes
Robert Iannuccibf5f84c2017-11-22 12:56:50 -08001821
nodirbe642ff2016-06-09 15:51:51 -07001822 cipd.validate_cipd_options(parser, options)
1823
vadimsh232f5a82017-01-20 19:23:44 -08001824 install_packages_fn = noop_install_packages
Ye Kuang1d096cb2020-06-26 08:38:21 +00001825 tmp_cipd_cache_dir = None
vadimsh902948e2017-01-20 15:57:32 -08001826 if options.cipd_enabled:
Ye Kuang1d096cb2020-06-26 08:38:21 +00001827 cache_dir = options.cipd_cache
1828 if not cache_dir:
1829 tmp_cipd_cache_dir = six.text_type(tempfile.mkdtemp())
1830 cache_dir = tmp_cipd_cache_dir
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001831 install_packages_fn = (
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001832 lambda run_dir, isolated_dir, cas_dir: install_client_and_packages(
Ye Kuang1d096cb2020-06-26 08:38:21 +00001833 run_dir,
1834 cipd.parse_package_args(options.cipd_packages),
1835 options.cipd_server,
1836 options.cipd_client_package,
1837 options.cipd_client_version,
1838 cache_dir=cache_dir,
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001839 isolated_dir=isolated_dir,
1840 cas_dir=cas_dir,
1841 ))
nodirbe642ff2016-06-09 15:51:51 -07001842
nodird6160682017-02-02 13:03:35 -08001843 @contextlib.contextmanager
nodir0ae98b32017-05-11 13:21:53 -07001844 def install_named_caches(run_dir):
nodird6160682017-02-02 13:03:35 -08001845 # WARNING: this function depends on "options" variable defined in the outer
1846 # function.
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001847 assert six.text_type(run_dir), repr(run_dir)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001848 assert os.path.isabs(run_dir), run_dir
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001849 named_caches = [(os.path.join(run_dir, six.text_type(relpath)), name)
1850 for name, relpath, _ in options.named_caches]
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001851 for path, name in named_caches:
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001852 named_cache.install(path, name)
nodird6160682017-02-02 13:03:35 -08001853 try:
1854 yield
1855 finally:
dnje289d132017-07-07 11:16:44 -07001856 # Uninstall each named cache, returning it to the cache pool. If an
1857 # uninstall fails for a given cache, it will remain in the task's
1858 # temporary space, get cleaned up by the Swarming bot, and be lost.
1859 #
1860 # If the Swarming bot cannot clean up the cache, it will handle it like
1861 # any other bot file that could not be removed.
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001862 for path, name in reversed(named_caches):
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001863 try:
Marc-Antoine Ruele9558372018-08-03 03:41:22 +00001864 # uninstall() doesn't trim but does call save() implicitly. Trimming
1865 # *must* be done manually via periodic 'run_isolated.py --clean'.
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001866 named_cache.uninstall(path, name)
1867 except local_caching.NamedCacheError:
Takuto Ikuta463ecdd2021-03-05 09:35:38 +00001868 if sys.platform == 'win32':
1869 # Show running processes.
1870 sys.stderr.write("running process\n")
1871 subprocess42.check_call(['tasklist.exe', '/V'], stdout=sys.stderr)
1872
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001873 logging.exception('Error while removing named cache %r at %r. '
1874 'The cache will be lost.', path, name)
nodirf33b8d62016-10-26 22:34:58 -07001875
Takuto Ikutaf3caa9b2020-11-02 05:38:26 +00001876 command = args
1877 if options.relative_cwd:
1878 a = os.path.normpath(os.path.abspath(options.relative_cwd))
1879 if not a.startswith(os.getcwd()):
1880 parser.error(
1881 '--relative-cwd must not try to escape the working directory')
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001882
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001883 containment_type = subprocess42.Containment.NONE
1884 if options.containment_type == 'AUTO':
1885 containment_type = subprocess42.Containment.AUTO
1886 if options.containment_type == 'JOB_OBJECT':
1887 containment_type = subprocess42.Containment.JOB_OBJECT
1888 containment = subprocess42.Containment(
1889 containment_type=containment_type,
1890 limit_processes=options.limit_processes,
1891 limit_total_committed_memory=options.limit_total_committed_memory)
1892
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001893 data = TaskData(
1894 command=command,
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001895 relative_cwd=options.relative_cwd,
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001896 isolated_hash=options.isolated,
1897 storage=None,
1898 isolate_cache=isolate_cache,
Junji Watanabe54925c32020-09-08 00:56:18 +00001899 cas_instance=options.cas_instance,
1900 cas_digest=options.cas_digest,
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001901 outputs=options.output,
1902 install_named_caches=install_named_caches,
1903 leak_temp_dir=options.leak_temp_dir,
1904 root_dir=_to_unicode(options.root_dir),
1905 hard_timeout=options.hard_timeout,
1906 grace_period=options.grace_period,
1907 bot_file=options.bot_file,
1908 switch_to_account=options.switch_to_account,
1909 install_packages_fn=install_packages_fn,
Takuto Ikuta5c59a842020-01-24 03:05:24 +00001910 use_go_isolated=use_go_isolated,
Takuto Ikuta10cae642020-01-08 08:12:07 +00001911 go_cache_dir=options.cache,
Takuto Ikuta879788c2020-01-10 08:00:26 +00001912 go_cache_policies=local_caching.CachePolicies(
1913 max_cache_size=options.max_cache_size,
1914 min_free_space=options.min_free_space,
1915 max_items=options.max_items,
1916 max_age_secs=None,
1917 ),
Junji Watanabeb03450b2020-09-25 05:09:27 +00001918 cas_cache_dir=options.cas_cache,
1919 cas_cache_policies=local_caching.CachePolicies(
1920 max_cache_size=options.max_cache_size,
1921 min_free_space=options.min_free_space,
1922 max_items=None,
1923 max_age_secs=None,
1924 ),
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001925 cas_kvs=options.kvs_dir,
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001926 env=options.env,
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +00001927 env_prefix=options.env_prefix,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001928 lower_priority=bool(options.lower_priority),
1929 containment=containment)
nodirbe642ff2016-06-09 15:51:51 -07001930 try:
nodir90bc8dc2016-06-15 13:35:21 -07001931 if options.isolate_server:
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +00001932 server_ref = isolate_storage.ServerRef(
nodir90bc8dc2016-06-15 13:35:21 -07001933 options.isolate_server, options.namespace)
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +00001934 storage = isolateserver.get_storage(server_ref)
nodir90bc8dc2016-06-15 13:35:21 -07001935 with storage:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001936 data = data._replace(storage=storage)
nodirf33b8d62016-10-26 22:34:58 -07001937 # Hashing schemes used by |storage| and |isolate_cache| MUST match.
Marc-Antoine Ruelb8513132018-11-20 19:48:53 +00001938 assert storage.server_ref.hash_algo == server_ref.hash_algo
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001939 return run_tha_test(data, options.json)
1940 return run_tha_test(data, options.json)
Junji Watanabe38b28b02020-04-23 10:23:30 +00001941 except (cipd.Error, local_caching.NamedCacheError,
1942 local_caching.NoMoreSpace) as ex:
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +00001943 print(ex.message, file=sys.stderr)
nodirbe642ff2016-06-09 15:51:51 -07001944 return 1
Ye Kuang1d096cb2020-06-26 08:38:21 +00001945 finally:
1946 if tmp_cipd_cache_dir is not None:
1947 try:
1948 file_path.rmtree(tmp_cipd_cache_dir)
1949 except OSError:
1950 logging.exception('Remove tmp_cipd_cache_dir=%s failed',
1951 tmp_cipd_cache_dir)
1952 # Best effort clean up. Failed to do so doesn't affect the outcome.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001953
1954
1955if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -07001956 subprocess42.inhibit_os_error_reporting()
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001957 # Ensure that we are always running with the correct encoding.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001958 fix_encoding.fix_encoding()
Ye Kuang2dd17442020-04-22 08:45:52 +00001959 net.set_user_agent('run_isolated.py/' + __version__)
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001960 sys.exit(main(sys.argv[1:]))