blob: 38af4e5cacfa7e49efdadf426839327bdf06c781 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
maruelea586f32016-04-05 11:11:33 -07002# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07003# Use of this source code is governed under the Apache License, Version 2.0
4# that can be found in the LICENSE file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00005
nodir55be77b2016-05-03 09:39:57 -07006"""Runs a command with optional isolated input/output.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +00008run_isolated takes cares of setting up a temporary environment, running a
9command, and tearing it down.
nodir55be77b2016-05-03 09:39:57 -070010
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +000011It handles downloading and uploading isolated files, mapping CIPD packages and
12reusing stateful named caches.
13
14The isolated files, CIPD packages and named caches are kept as a global LRU
15cache.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050016
Roberto Carrillo71ade6d2018-10-08 22:30:24 +000017Any ${EXECUTABLE_SUFFIX} on the command line or the environment variables passed
18with the --env option will be replaced with ".exe" string on Windows and "" on
19other platforms.
nodirbe642ff2016-06-09 15:51:51 -070020
Roberto Carrillo71ade6d2018-10-08 22:30:24 +000021Any ${ISOLATED_OUTDIR} on the command line or the environment variables passed
22with the --env option will be replaced by the location of a temporary directory
23upon execution of the command specified in the .isolated file. All content
24written to this directory will be uploaded upon termination and the .isolated
25file describing this directory will be printed to stdout.
bpastene447c1992016-06-20 15:21:47 -070026
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +000027Any ${SWARMING_BOT_FILE} on the command line or the environment variables passed
28with the --env option will be replaced by the value of the --bot-file parameter.
29This file is used by a swarming bot to communicate state of the host to tasks.
30It is written to by the swarming bot's on_before_task() hook in the swarming
31server's custom bot_config.py.
32
Joanna Wang4cec0e42021-08-26 00:48:37 +000033Any ${SWARMING_TASK_ID} on the command line will be replaced by the
34SWARMING_TASK_ID value passed with the --env option.
35
Marc-Antoine Rueleed2f3a2019-03-14 00:00:40 +000036See
37https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Magic-Values.md
38for all the variables.
39
40See
41https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/swarming_bot/config/bot_config.py
42for more information about bot_config.py.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000043"""
44
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +000045from __future__ import print_function
46
47__version__ = '1.0.1'
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000048
aludwin7556e0c2016-10-26 08:46:10 -070049import argparse
maruel064c0a32016-04-05 11:47:15 -070050import base64
iannucci96fcccc2016-08-30 15:52:22 -070051import collections
vadimsh232f5a82017-01-20 19:23:44 -080052import contextlib
Ye Kuangfff1e502020-07-13 13:21:57 +000053import distutils
Sadaf Matinkhoo10743a62018-03-29 16:28:58 -040054import errno
aludwin7556e0c2016-10-26 08:46:10 -070055import json
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000056import logging
57import optparse
58import os
Takuto Ikuta5c59a842020-01-24 03:05:24 +000059import platform
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -040060import re
Junji Watanabedc2f89e2021-11-08 08:44:30 +000061import shutil
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000062import sys
63import tempfile
maruel064c0a32016-04-05 11:47:15 -070064import time
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000065
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000066from utils import tools
67tools.force_local_third_party()
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000068
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000069# third_party/
70from depot_tools import fix_encoding
71
72# pylint: disable=ungrouped-imports
Takuto Ikutad53d7bd2021-07-16 03:09:33 +000073import DEPS
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000074import auth
75import cipd
Marc-Antoine Ruel016c7602019-04-02 18:31:13 +000076import local_caching
77from libs import luci_context
Vadim Shtayura6b555c12014-07-23 16:22:18 -070078from utils import file_path
maruel12e30012015-10-09 11:55:35 -070079from utils import fs
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040080from utils import logging_utils
Ye Kuang2dd17442020-04-22 08:45:52 +000081from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040082from utils import on_error
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -050083from utils import subprocess42
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000084
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000085
maruele2f2cb82016-07-13 14:41:03 -070086# Magic variables that can be found in the isolate task command line.
87ISOLATED_OUTDIR_PARAMETER = '${ISOLATED_OUTDIR}'
88EXECUTABLE_SUFFIX_PARAMETER = '${EXECUTABLE_SUFFIX}'
89SWARMING_BOT_FILE_PARAMETER = '${SWARMING_BOT_FILE}'
Joanna Wang4cec0e42021-08-26 00:48:37 +000090SWARMING_TASK_ID_PARAMETER = '${SWARMING_TASK_ID}'
maruele2f2cb82016-07-13 14:41:03 -070091
92
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000093# The name of the log file to use.
94RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
95
maruele2f2cb82016-07-13 14:41:03 -070096
maruele2f2cb82016-07-13 14:41:03 -070097# Use short names for temporary directories. This is driven by Windows, which
98# imposes a relatively short maximum path length of 260 characters, often
99# referred to as MAX_PATH. It is relatively easy to create files with longer
Marc-Antoine Ruel793bff32019-04-18 17:50:48 +0000100# path length. A use case is with recursive dependency trees like npm packages.
maruele2f2cb82016-07-13 14:41:03 -0700101#
102# It is recommended to start the script with a `root_dir` as short as
103# possible.
104# - ir stands for isolated_run
105# - io stands for isolated_out
106# - it stands for isolated_tmp
Takuto Ikutab7ce0e32019-11-27 23:26:18 +0000107# - ic stands for isolated_client
Anirudh Mathukumilli92d57b62021-08-04 23:21:57 +0000108# - ns stands for nsjail
Junji Watanabe53d31882022-01-13 07:58:00 +0000109ISOLATED_RUN_DIR = 'ir'
110ISOLATED_OUT_DIR = 'io'
111ISOLATED_TMP_DIR = 'it'
112ISOLATED_CLIENT_DIR = 'ic'
113_CAS_CLIENT_DIR = 'cc'
114_NSJAIL_DIR = 'ns'
maruele2f2cb82016-07-13 14:41:03 -0700115
Takuto Ikuta02edca22019-11-29 10:04:51 +0000116# TODO(tikuta): take these parameter from luci-config?
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000117_CAS_PACKAGE = 'infra/tools/luci/cas/${platform}'
Takuto Ikutad53d7bd2021-07-16 03:09:33 +0000118_LUCI_GO_REVISION = DEPS.deps['luci-go']['packages'][0]['version']
Anirudh Mathukumilli92d57b62021-08-04 23:21:57 +0000119_NSJAIL_PACKAGE = 'infra/3pp/tools/nsjail/${platform}'
120_NSJAIL_VERSION = DEPS.deps['nsjail']['packages'][0]['version']
maruele2f2cb82016-07-13 14:41:03 -0700121
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400122# Keep synced with task_request.py
Lei Leife202df2019-06-11 17:33:34 +0000123CACHE_NAME_RE = re.compile(r'^[a-z0-9_]{1,4096}$')
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400124
Takuto Ikutac9ddff22021-02-18 07:58:39 +0000125_FREE_SPACE_BUFFER_FOR_CIPD_PACKAGES = 2 * 1024 * 1024 * 1024
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -0400126
marueld928c862017-06-08 08:20:04 -0700127OUTLIVING_ZOMBIE_MSG = """\
128*** Swarming tried multiple times to delete the %s directory and failed ***
129*** Hard failing the task ***
130
131Swarming detected that your testing script ran an executable, which may have
132started a child executable, and the main script returned early, leaving the
133children executables playing around unguided.
134
135You don't want to leave children processes outliving the task on the Swarming
136bot, do you? The Swarming bot doesn't.
137
138How to fix?
139- For any process that starts children processes, make sure all children
140 processes terminated properly before each parent process exits. This is
141 especially important in very deep process trees.
142 - This must be done properly both in normal successful task and in case of
143 task failure. Cleanup is very important.
144- The Swarming bot sends a SIGTERM in case of timeout.
145 - You have %s seconds to comply after the signal was sent to the process
146 before the process is forcibly killed.
147- To achieve not leaking children processes in case of signals on timeout, you
148 MUST handle signals in each executable / python script and propagate them to
149 children processes.
150 - When your test script (python or binary) receives a signal like SIGTERM or
151 CTRL_BREAK_EVENT on Windows), send it to all children processes and wait for
152 them to terminate before quitting.
153
154See
Marc-Antoine Ruelc7243592018-05-24 17:04:04 -0400155https://chromium.googlesource.com/infra/luci/luci-py.git/+/master/appengine/swarming/doc/Bot.md#Graceful-termination_aka-the-SIGTERM-and-SIGKILL-dance
marueld928c862017-06-08 08:20:04 -0700156for more information.
157
158*** May the SIGKILL force be with you ***
159"""
160
161
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000162# Currently hardcoded. Eventually could be exposed as a flag once there's value.
163# 3 weeks
164MAX_AGE_SECS = 21*24*60*60
165
Takuto Ikuta7ff4b242020-12-03 08:07:06 +0000166_CAS_KVS_CACHE_THRESHOLD = 5 * 1024 * 1024 * 1024 # 5 GiB
167
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500168TaskData = collections.namedtuple(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000169 'TaskData',
170 [
Takuto Ikuta9a319502019-11-26 07:40:14 +0000171 # List of strings; the command line to use, independent of what was
172 # specified in the isolated file.
173 'command',
174 # Relative directory to start command into.
175 'relative_cwd',
Junji Watanabe54925c32020-09-08 00:56:18 +0000176 # Digest of the input root on RBE-CAS.
177 'cas_digest',
178 # Full CAS instance name.
179 'cas_instance',
Takuto Ikuta9a319502019-11-26 07:40:14 +0000180 # List of paths relative to root_dir to put into the output isolated
181 # bundle upon task completion (see link_outputs_to_outdir).
182 'outputs',
183 # Function (run_dir) => context manager that installs named caches into
184 # |run_dir|.
185 'install_named_caches',
186 # If True, the temporary directory will be deliberately leaked for later
187 # examination.
188 'leak_temp_dir',
189 # Path to the directory to use to create the temporary directory. If not
190 # specified, a random temporary directory is created.
191 'root_dir',
192 # Kills the process if it lasts more than this amount of seconds.
193 'hard_timeout',
194 # Number of seconds to wait between SIGTERM and SIGKILL.
195 'grace_period',
196 # Path to a file with bot state, used in place of ${SWARMING_BOT_FILE}
197 # task command line argument.
198 'bot_file',
199 # Logical account to switch LUCI_CONTEXT into.
200 'switch_to_account',
201 # Context manager dir => CipdInfo, see install_client_and_packages.
202 'install_packages_fn',
Junji Watanabeb03450b2020-09-25 05:09:27 +0000203 # Cache directory for `cas` client.
204 'cas_cache_dir',
205 # Parameters passed to `cas` client.
206 'cas_cache_policies',
Takuto Ikutaae391c52020-12-03 08:43:45 +0000207 # Parameters for kvs file used by `cas` client.
208 'cas_kvs',
Takuto Ikuta9a319502019-11-26 07:40:14 +0000209 # Environment variables to set.
210 'env',
211 # Environment variables to mutate with relative directories.
212 # Example: {"ENV_KEY": ['relative', 'paths', 'to', 'prepend']}
213 'env_prefix',
214 # Lowers the task process priority.
215 'lower_priority',
216 # subprocess42.Containment instance. Can be None.
217 'containment',
Junji Watanabeaee69ad2021-04-28 03:17:34 +0000218 # Function to trim caches before installing cipd packages and
219 # downloading isolated files.
220 'trim_caches_fn',
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000221 ])
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500222
223
Justin Luong54fa9592022-08-11 03:44:40 +0000224class NonRecoverableException(Exception):
225 """For handling errors where we cannot recover from and should not retry."""
226
227 def __init__(self, status, msg):
228 super(Exception, self).__init__(msg)
229 self.status = status
230
231 def to_dict(self):
232 """Returns a dictionary with the attributes serialised."""
233 raise NotImplementedError()
234
235
236class NonRetriableCasException(NonRecoverableException):
237 """For handling a bad CAS input where we should not attempt to retry."""
238
239 def __init__(self, status, digest, instance):
240 super(NonRetriableCasException, self).__init__(
241 status, "CAS error: {} with digest {} on instance {}".format(
242 status, digest, instance))
243 self.digest = digest
244 self.instance = instance
245
246 def to_dict(self):
247 return {
248 'status': self.status,
249 'digest': self.digest,
250 'instance': self.instance,
251 }
252
253
254class NonRetriableCipdException(NonRecoverableException):
255 """For handling a bad CIPD package where we should not attempt to retry."""
256
257 def __init__(self, status, package_name, path, version):
258 super(NonRetriableCipdException, self).__init__(
259 status, "CIPD error: {} with package {}, version {} on path {}".format(
260 status, package_name, version, path))
261 self.package_name = package_name
262 self.path = path
263 self.version = version
264
265 def to_dict(self):
266 return {
267 'status': self.status,
268 'package_name': self.package_name,
269 'path': self.path,
270 'version': self.version
271 }
272
273
maruel03e11842016-07-14 10:50:16 -0700274def make_temp_dir(prefix, root_dir):
275 """Returns a new unique temporary directory."""
Junji Watanabe7a631b02022-01-13 02:30:29 +0000276 return tempfile.mkdtemp(prefix=prefix, dir=root_dir)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000277
278
vadimsh9c54b2c2017-07-25 14:08:29 -0700279@contextlib.contextmanager
280def set_luci_context_account(account, tmp_dir):
281 """Sets LUCI_CONTEXT account to be used by the task.
282
283 If 'account' is None or '', does nothing at all. This happens when
284 run_isolated.py is called without '--switch-to-account' flag. In this case,
285 if run_isolated.py is running in some LUCI_CONTEXT environment, the task will
Takuto Ikuta33e2ff32019-09-30 12:44:03 +0000286 just inherit whatever account is already set. This may happen if users invoke
vadimsh9c54b2c2017-07-25 14:08:29 -0700287 run_isolated.py explicitly from their code.
288
289 If the requested account is not defined in the context, switches to
290 non-authenticated access. This happens for Swarming tasks that don't use
291 'task' service accounts.
292
293 If not using LUCI_CONTEXT-based auth, does nothing.
294 If already running as requested account, does nothing.
295 """
296 if not account:
297 # Not actually switching.
298 yield
299 return
300
301 local_auth = luci_context.read('local_auth')
302 if not local_auth:
303 # Not using LUCI_CONTEXT auth at all.
304 yield
305 return
306
307 # See LUCI_CONTEXT.md for the format of 'local_auth'.
308 if local_auth.get('default_account_id') == account:
309 # Already set, no need to switch.
310 yield
311 return
312
313 available = {a['id'] for a in local_auth.get('accounts') or []}
314 if account in available:
315 logging.info('Switching default LUCI_CONTEXT account to %r', account)
316 local_auth['default_account_id'] = account
317 else:
318 logging.warning(
319 'Requested LUCI_CONTEXT account %r is not available (have only %r), '
320 'disabling authentication', account, sorted(available))
321 local_auth.pop('default_account_id', None)
322
323 with luci_context.write(_tmpdir=tmp_dir, local_auth=local_auth):
324 yield
325
326
nodir90bc8dc2016-06-15 13:35:21 -0700327def process_command(command, out_dir, bot_file):
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000328 """Replaces parameters in a command line.
nodirbe642ff2016-06-09 15:51:51 -0700329
330 Raises:
331 ValueError if a parameter is requested in |command| but its value is not
332 provided.
333 """
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000334 return [replace_parameters(arg, out_dir, bot_file) for arg in command]
335
336
337def replace_parameters(arg, out_dir, bot_file):
338 """Replaces parameter tokens with appropriate values in a string.
339
340 Raises:
341 ValueError if a parameter is requested in |arg| but its value is not
342 provided.
343 """
344 arg = arg.replace(EXECUTABLE_SUFFIX_PARAMETER, cipd.EXECUTABLE_SUFFIX)
345 replace_slash = False
346 if ISOLATED_OUTDIR_PARAMETER in arg:
347 if not out_dir:
348 raise ValueError(
349 'output directory is requested in command or env var, but not '
350 'provided; please specify one')
351 arg = arg.replace(ISOLATED_OUTDIR_PARAMETER, out_dir)
352 replace_slash = True
353 if SWARMING_BOT_FILE_PARAMETER in arg:
354 if bot_file:
355 arg = arg.replace(SWARMING_BOT_FILE_PARAMETER, bot_file)
nodirbe642ff2016-06-09 15:51:51 -0700356 replace_slash = True
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000357 else:
358 logging.warning('SWARMING_BOT_FILE_PARAMETER found in command or env '
359 'var, but no bot_file specified. Leaving parameter '
360 'unchanged.')
Joanna Wang4cec0e42021-08-26 00:48:37 +0000361 if SWARMING_TASK_ID_PARAMETER in arg:
362 task_id = os.environ.get('SWARMING_TASK_ID')
363 if task_id:
364 arg = arg.replace(SWARMING_TASK_ID_PARAMETER, task_id)
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000365 if replace_slash:
366 # Replace slashes only if parameters are present
367 # because of arguments like '${ISOLATED_OUTDIR}/foo/bar'
368 arg = arg.replace('/', os.sep)
369 return arg
maruela9cfd6f2015-09-15 11:03:15 -0700370
371
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000372def set_temp_dir(env, tmp_dir):
373 """Set temp dir to given env var dictionary"""
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000374 # pylint: disable=line-too-long
375 # * python respects $TMPDIR, $TEMP, and $TMP in this order, regardless of
376 # platform. So $TMPDIR must be set on all platforms.
377 # https://github.com/python/cpython/blob/2.7/Lib/tempfile.py#L155
378 env['TMPDIR'] = tmp_dir
379 if sys.platform == 'win32':
380 # * chromium's base utils uses GetTempPath().
381 # https://cs.chromium.org/chromium/src/base/files/file_util_win.cc?q=GetTempPath
382 # * Go uses GetTempPath().
383 # * GetTempDir() uses %TMP%, then %TEMP%, then other stuff. So %TMP% must be
384 # set.
385 # https://docs.microsoft.com/en-us/windows/desktop/api/fileapi/nf-fileapi-gettemppathw
386 env['TMP'] = tmp_dir
387 # https://blogs.msdn.microsoft.com/oldnewthing/20150417-00/?p=44213
388 env['TEMP'] = tmp_dir
389 elif sys.platform == 'darwin':
390 # * Chromium uses an hack on macOS before calling into
391 # NSTemporaryDirectory().
392 # https://cs.chromium.org/chromium/src/base/files/file_util_mac.mm?q=GetTempDir
393 # https://developer.apple.com/documentation/foundation/1409211-nstemporarydirectory
394 env['MAC_CHROMIUM_TMPDIR'] = tmp_dir
395 else:
396 # TMPDIR is specified as the POSIX standard envvar for the temp directory.
397 # http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html
398 # * mktemp on linux respects $TMPDIR.
399 # * Chromium respects $TMPDIR on linux.
400 # https://cs.chromium.org/chromium/src/base/files/file_util_posix.cc?q=GetTempDir
401 # * Go uses $TMPDIR.
402 # https://go.googlesource.com/go/+/go1.10.3/src/os/file_unix.go#307
403 pass
404
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000405
406def get_command_env(tmp_dir, cipd_info, run_dir, env, env_prefixes, out_dir,
407 bot_file):
vadimsh232f5a82017-01-20 19:23:44 -0800408 """Returns full OS environment to run a command in.
409
Robert Iannuccibf5f84c2017-11-22 12:56:50 -0800410 Sets up TEMP, puts directory with cipd binary in front of PATH, exposes
411 CIPD_CACHE_DIR env var, and installs all env_prefixes.
vadimsh232f5a82017-01-20 19:23:44 -0800412
413 Args:
414 tmp_dir: temp directory.
415 cipd_info: CipdInfo object is cipd client is used, None if not.
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500416 run_dir: The root directory the isolated tree is mapped in.
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500417 env: environment variables to use
Robert Iannuccibf5f84c2017-11-22 12:56:50 -0800418 env_prefixes: {"ENV_KEY": ['cwd', 'relative', 'paths', 'to', 'prepend']}
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000419 out_dir: Isolated output directory. Required to be != None if any of the
420 env vars contain ISOLATED_OUTDIR_PARAMETER.
421 bot_file: Required to be != None if any of the env vars contain
422 SWARMING_BOT_FILE_PARAMETER.
vadimsh232f5a82017-01-20 19:23:44 -0800423 """
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500424 out = os.environ.copy()
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000425 for k, v in env.items():
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500426 if not v:
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500427 out.pop(k, None)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500428 else:
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000429 out[k] = replace_parameters(v, out_dir, bot_file)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500430
431 if cipd_info:
432 bin_dir = os.path.dirname(cipd_info.client.binary_path)
Junji Watanabe7a631b02022-01-13 02:30:29 +0000433 out['PATH'] = '%s%s%s' % (bin_dir, os.pathsep, out['PATH'])
434 out['CIPD_CACHE_DIR'] = cipd_info.cache_dir
Takuto Ikuta4ec3e8f2021-04-05 10:21:29 +0000435 cipd_info_path = os.path.join(tmp_dir, 'cipd_info.json')
436 with open(cipd_info_path, 'w') as f:
437 json.dump(cipd_info.pins, f)
438 out['ISOLATED_RESOLVED_PACKAGE_VERSIONS_FILE'] = cipd_info_path
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500439
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +0000440 for key, paths in env_prefixes.items():
Marc-Antoine Ruel9ec1e9f2017-12-20 16:36:54 -0500441 assert isinstance(paths, list), paths
442 paths = [os.path.normpath(os.path.join(run_dir, p)) for p in paths]
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500443 cur = out.get(key)
444 if cur:
445 paths.append(cur)
Junji Watanabe7a631b02022-01-13 02:30:29 +0000446 out[key] = os.path.pathsep.join(paths)
vadimsh232f5a82017-01-20 19:23:44 -0800447
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000448 set_temp_dir(out, tmp_dir)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -0500449 return out
vadimsh232f5a82017-01-20 19:23:44 -0800450
451
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000452def run_command(
453 command, cwd, env, hard_timeout, grace_period, lower_priority, containment):
maruel6be7f9e2015-10-01 12:25:30 -0700454 """Runs the command.
455
456 Returns:
457 tuple(process exit code, bool if had a hard timeout)
458 """
Jonah Hooper9b5bd8c2022-07-21 15:33:41 +0000459 logging_utils.user_logs('run_command(%s, %s, %s, %s, %s, %s)', command, cwd,
460 hard_timeout, grace_period, lower_priority,
461 containment)
marueleb5fbee2015-09-17 13:01:36 -0700462
maruel6be7f9e2015-10-01 12:25:30 -0700463 exit_code = None
464 had_hard_timeout = False
maruela9cfd6f2015-09-15 11:03:15 -0700465 with tools.Profiler('RunTest'):
maruel6be7f9e2015-10-01 12:25:30 -0700466 proc = None
467 had_signal = []
maruela9cfd6f2015-09-15 11:03:15 -0700468 try:
maruel6be7f9e2015-10-01 12:25:30 -0700469 # TODO(maruel): This code is imperfect. It doesn't handle well signals
470 # during the download phase and there's short windows were things can go
471 # wrong.
472 def handler(signum, _frame):
473 if proc and not had_signal:
474 logging.info('Received signal %d', signum)
475 had_signal.append(True)
maruel556d9052015-10-05 11:12:44 -0700476 raise subprocess42.TimeoutExpired(command, None)
maruel6be7f9e2015-10-01 12:25:30 -0700477
Marc-Antoine Ruel30b80fe2019-02-08 13:51:31 +0000478 proc = subprocess42.Popen(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000479 command, cwd=cwd, env=env, detached=True, close_fds=True,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000480 lower_priority=lower_priority, containment=containment)
Joanna Wang40959bf2021-08-12 18:10:12 +0000481 logging.info('Subprocess for command started')
maruel6be7f9e2015-10-01 12:25:30 -0700482 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
483 try:
John Budorickc398f092019-06-10 22:49:44 +0000484 exit_code = proc.wait(hard_timeout or None)
Takuto Ikuta88382c82022-02-03 08:46:17 +0000485 logging.info("finished with exit code %d after hard_timeout %s",
486 exit_code, hard_timeout)
maruel6be7f9e2015-10-01 12:25:30 -0700487 except subprocess42.TimeoutExpired:
488 if not had_signal:
489 logging.warning('Hard timeout')
490 had_hard_timeout = True
491 logging.warning('Sending SIGTERM')
492 proc.terminate()
493
Takuto Ikuta684f7912020-09-29 07:49:49 +0000494 kill_sent = False
maruel6be7f9e2015-10-01 12:25:30 -0700495 # Ignore signals in grace period. Forcibly give the grace period to the
496 # child process.
497 if exit_code is None:
498 ignore = lambda *_: None
499 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
500 try:
501 exit_code = proc.wait(grace_period or None)
Takuto Ikuta88382c82022-02-03 08:46:17 +0000502 logging.info("finished with exit code %d after grace_period %s",
503 exit_code, grace_period)
maruel6be7f9e2015-10-01 12:25:30 -0700504 except subprocess42.TimeoutExpired:
505 # Now kill for real. The user can distinguish between the
506 # following states:
507 # - signal but process exited within grace period,
508 # hard_timed_out will be set but the process exit code will be
509 # script provided.
510 # - processed exited late, exit code will be -9 on posix.
511 logging.warning('Grace exhausted; sending SIGKILL')
512 proc.kill()
Takuto Ikuta684f7912020-09-29 07:49:49 +0000513 kill_sent = True
martiniss5c8043e2017-08-01 17:09:43 -0700514 logging.info('Waiting for process exit')
maruel6be7f9e2015-10-01 12:25:30 -0700515 exit_code = proc.wait()
Takuto Ikuta684f7912020-09-29 07:49:49 +0000516
517 # the process group / job object may be dangling so if we didn't kill
518 # it already, give it a poke now.
519 if not kill_sent:
520 proc.kill()
Takuto Ikutaeccf0862020-03-19 03:05:55 +0000521 except OSError as e:
maruela9cfd6f2015-09-15 11:03:15 -0700522 # This is not considered to be an internal error. The executable simply
523 # does not exit.
maruela72f46e2016-02-24 11:05:45 -0800524 sys.stderr.write(
tikuta2d678212019-09-23 23:12:08 +0000525 '<The executable does not exist, a dependent library is missing or '
526 'the command line is too long>\n'
527 '<Check for missing .so/.dll in the .isolate or GN file or length of '
528 'command line args>\n'
Takuto Ikutae900df42021-04-14 04:40:11 +0000529 '<Command: %s>\n'
530 '<Exception: %s>\n' % (command, e))
maruela72f46e2016-02-24 11:05:45 -0800531 if os.environ.get('SWARMING_TASK_ID'):
532 # Give an additional hint when running as a swarming task.
533 sys.stderr.write(
534 '<See the task\'s page for commands to help diagnose this issue '
535 'by reproducing the task locally>\n')
maruela9cfd6f2015-09-15 11:03:15 -0700536 exit_code = 1
537 logging.info(
538 'Command finished with exit code %d (%s)',
539 exit_code, hex(0xffffffff & exit_code))
maruel6be7f9e2015-10-01 12:25:30 -0700540 return exit_code, had_hard_timeout
maruela9cfd6f2015-09-15 11:03:15 -0700541
542
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000543def _run_go_cmd_and_wait(cmd, tmp_dir):
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000544 """
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000545 Runs an external Go command, `isolated` or `cas`, and wait for its completion.
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000546
547 While this is a generic function to launch a subprocess, it has logic that
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000548 is specific to Go `isolated` and `cas` for waiting and logging.
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000549
550 Returns:
551 The subprocess object
552 """
Ye Kuang3c40e9f2020-07-28 13:15:25 +0000553 cmd_str = ' '.join(cmd)
Ye Kuangc1d800f2020-07-28 10:14:55 +0000554 try:
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000555 env = os.environ.copy()
556 set_temp_dir(env, tmp_dir)
557 proc = subprocess42.Popen(cmd, env=env)
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000558
Ye Kuangc1d800f2020-07-28 10:14:55 +0000559 exceeded_max_timeout = True
560 check_period_sec = 30
561 max_checks = 100
562 # max timeout = max_checks * check_period_sec = 50 minutes
563 for i in range(max_checks):
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000564 # This is to prevent I/O timeout error during setup.
Ye Kuangc1d800f2020-07-28 10:14:55 +0000565 try:
566 retcode = proc.wait(check_period_sec)
567 if retcode != 0:
Takuto Ikuta27f4b2f2021-04-26 07:18:55 +0000568 raise subprocess42.CalledProcessError(retcode, cmd=cmd_str)
Ye Kuangc1d800f2020-07-28 10:14:55 +0000569 exceeded_max_timeout = False
570 break
571 except subprocess42.TimeoutExpired:
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000572 print('still running (after %d seconds)' % ((i + 1) * check_period_sec))
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000573
Ye Kuangc1d800f2020-07-28 10:14:55 +0000574 if exceeded_max_timeout:
575 proc.terminate()
576 try:
577 proc.wait(check_period_sec)
578 except subprocess42.TimeoutExpired:
579 logging.exception(
580 "failed to terminate? timeout happened after %d seconds",
581 check_period_sec)
582 proc.kill()
583 proc.wait()
584 # Raise unconditionally, because |proc| was forcefully terminated.
585 raise ValueError("timedout after %d seconds (cmd=%s)" %
586 (check_period_sec * max_checks, cmd_str))
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000587
Ye Kuangc1d800f2020-07-28 10:14:55 +0000588 return proc
589 except Exception:
590 logging.exception('Failed to run Go cmd %s', cmd_str)
591 raise
Ye Kuangc0cf9ca2020-07-16 08:56:51 +0000592
593
Takuto Ikutacd68ef52021-11-18 04:11:45 +0000594def _fetch_and_map(cas_client, digest, instance, output_dir, cache_dir,
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +0000595 policies, kvs_dir, tmp_dir):
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000596 """
597 Fetches a CAS tree using cas client, create the tree and returns download
598 stats.
599 """
600
601 start = time.time()
602 result_json_handle, result_json_path = tempfile.mkstemp(
Junji Watanabe53d31882022-01-13 07:58:00 +0000603 prefix='fetch-and-map-result-', suffix='.json')
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000604 os.close(result_json_handle)
Takuto Ikutad5749ac2021-04-07 06:16:19 +0000605 profile_dir = tempfile.mkdtemp(dir=tmp_dir)
606
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000607 try:
608 cmd = [
609 cas_client,
610 'download',
611 '-digest',
612 digest,
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000613 # flags for cache.
614 '-cache-dir',
615 cache_dir,
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000616 '-cache-max-size',
617 str(policies.max_cache_size),
618 '-cache-min-free-space',
619 str(policies.min_free_space),
620 # flags for output.
621 '-dir',
622 output_dir,
Justin Luong54fa9592022-08-11 03:44:40 +0000623 '-dump-json',
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000624 result_json_path,
Takuto Ikuta557025b2021-02-01 08:37:40 +0000625 '-log-level',
Takuto Ikutad5749ac2021-04-07 06:16:19 +0000626 'info',
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000627 ]
Takuto Ikutaae391c52020-12-03 08:43:45 +0000628
Junji Watanabe66d807b2021-11-08 03:20:10 +0000629 # When RUN_ISOLATED_CAS_ADDRESS is set in test mode,
630 # Use it and ignore CAS instance option.
631 cas_addr = os.environ.get('RUN_ISOLATED_CAS_ADDRESS')
632 if cas_addr:
633 cmd.extend([
634 '-cas-addr',
635 cas_addr,
636 ])
637 else:
638 cmd.extend([
639 '-cas-instance',
640 instance
641 ])
642
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +0000643 if kvs_dir:
644 cmd.extend(['-kvs-dir', kvs_dir])
Takuto Ikutaae391c52020-12-03 08:43:45 +0000645
Justin Luong54fa9592022-08-11 03:44:40 +0000646 def open_json_and_check(result_json_path, cleanup_dirs):
647 cas_error = False
648 result_json = {}
649 try:
650 with open(result_json_path) as json_file:
651 result_json = json.load(json_file)
652 cas_error = result_json.get('result') in ('digest_invalid',
653 'authentication_error',
654 'arguments_invalid')
655 except (IOError, ValueError):
656 logging.error('Failed to read json file: %s', result_json_path)
657 raise
658 finally:
659 if cleanup_dirs:
660 file_path.rmtree(kvs_dir)
661 file_path.rmtree(output_dir)
662 if cas_error:
663 raise NonRetriableCasException(result_json['result'], digest, instance)
664 return result_json
665
Takuto Ikuta27f4b2f2021-04-26 07:18:55 +0000666 try:
667 _run_go_cmd_and_wait(cmd, tmp_dir)
Takuto Ikuta0909eae2021-04-27 02:54:07 +0000668 except subprocess42.CalledProcessError as ex:
Takuto Ikuta27f4b2f2021-04-26 07:18:55 +0000669 if not kvs_dir:
Justin Luong54fa9592022-08-11 03:44:40 +0000670 open_json_and_check(result_json_path, False)
Takuto Ikuta27f4b2f2021-04-26 07:18:55 +0000671 raise
Justin Luong54fa9592022-08-11 03:44:40 +0000672 open_json_and_check(result_json_path, True)
Takuto Ikuta27f4b2f2021-04-26 07:18:55 +0000673 logging.exception('Failed to run cas, removing kvs cache dir and retry.')
Takuto Ikuta0909eae2021-04-27 02:54:07 +0000674 on_error.report("Failed to run cas %s" % ex)
Takuto Ikuta27f4b2f2021-04-26 07:18:55 +0000675 _run_go_cmd_and_wait(cmd, tmp_dir)
676
Justin Luong54fa9592022-08-11 03:44:40 +0000677 result_json = open_json_and_check(result_json_path, False)
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000678
679 return {
680 'duration': time.time() - start,
681 'items_cold': result_json['items_cold'],
682 'items_hot': result_json['items_hot'],
683 }
684 finally:
685 fs.remove(result_json_path)
Takuto Ikutad5749ac2021-04-07 06:16:19 +0000686 file_path.rmtree(profile_dir)
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000687
688
aludwin0a8e17d2016-10-27 15:57:39 -0700689def link_outputs_to_outdir(run_dir, out_dir, outputs):
690 """Links any named outputs to out_dir so they can be uploaded.
691
692 Raises an error if the file already exists in that directory.
693 """
694 if not outputs:
695 return
Takuto Ikutae0dce462021-11-16 08:49:46 +0000696 file_path.create_directories(out_dir, outputs)
aludwin0a8e17d2016-10-27 15:57:39 -0700697 for o in outputs:
Sadaf Matinkhoo10743a62018-03-29 16:28:58 -0400698 copy_recursively(os.path.join(run_dir, o), os.path.join(out_dir, o))
699
700
701def copy_recursively(src, dst):
702 """Efficiently copies a file or directory from src_dir to dst_dir.
703
704 `item` may be a file, directory, or a symlink to a file or directory.
705 All symlinks are replaced with their targets, so the resulting
706 directory structure in dst_dir will never have any symlinks.
707
708 To increase speed, copy_recursively hardlinks individual files into the
709 (newly created) directory structure if possible, unlike Python's
710 shutil.copytree().
711 """
712 orig_src = src
713 try:
714 # Replace symlinks with their final target.
715 while fs.islink(src):
716 res = fs.readlink(src)
Takuto Ikutaf2ad0a02021-06-24 08:38:40 +0000717 src = os.path.realpath(os.path.join(os.path.dirname(src), res))
Sadaf Matinkhoo10743a62018-03-29 16:28:58 -0400718 # TODO(sadafm): Explicitly handle cyclic symlinks.
719
Takuto Ikutaf2ad0a02021-06-24 08:38:40 +0000720 if not fs.exists(src):
721 logging.warning('Path %s does not exist or %s is a broken symlink', src,
722 orig_src)
723 return
724
Sadaf Matinkhoo10743a62018-03-29 16:28:58 -0400725 if fs.isfile(src):
726 file_path.link_file(dst, src, file_path.HARDLINK_WITH_FALLBACK)
727 return
728
729 if not fs.exists(dst):
730 os.makedirs(dst)
731
732 for child in fs.listdir(src):
733 copy_recursively(os.path.join(src, child), os.path.join(dst, child))
734
735 except OSError as e:
736 if e.errno == errno.ENOENT:
737 logging.warning('Path %s does not exist or %s is a broken symlink',
738 src, orig_src)
739 else:
740 logging.info("Couldn't collect output file %s: %s", src, e)
aludwin0a8e17d2016-10-27 15:57:39 -0700741
742
Takuto Ikutacd68ef52021-11-18 04:11:45 +0000743def upload_outdir(cas_client, cas_instance, outdir, tmp_dir):
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000744 """Uploads the results in |outdir|, if there is any.
745
746 Returns:
747 tuple(root_digest, stats)
748 - root_digest: a digest of the output directory.
749 - stats: uploading stats.
750 """
Junji Watanabe15f9e042021-11-12 07:13:50 +0000751 if not fs.listdir(outdir):
752 return None, None
Junji Watanabe53d31882022-01-13 07:58:00 +0000753 digest_file_handle, digest_path = tempfile.mkstemp(prefix='cas-digest',
754 suffix='.txt')
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000755 os.close(digest_file_handle)
Junji Watanabe53d31882022-01-13 07:58:00 +0000756 stats_json_handle, stats_json_path = tempfile.mkstemp(prefix='upload-stats',
757 suffix='.json')
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000758 os.close(stats_json_handle)
759
760 try:
761 cmd = [
762 cas_client,
763 'archive',
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000764 '-paths',
765 # Format: <working directory>:<relative path to dir>
766 outdir + ':',
767 # output
768 '-dump-digest',
769 digest_path,
Justin Luong54fa9592022-08-11 03:44:40 +0000770 '-dump-json',
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000771 stats_json_path,
772 ]
773
Junji Watanabe66d807b2021-11-08 03:20:10 +0000774 # When RUN_ISOLATED_CAS_ADDRESS is set in test mode,
775 # Use it and ignore CAS instance option.
776 cas_addr = os.environ.get('RUN_ISOLATED_CAS_ADDRESS')
777 if cas_addr:
778 cmd.extend([
779 '-cas-addr',
780 cas_addr,
781 ])
782 else:
783 cmd.extend([
784 '-cas-instance',
785 cas_instance
786 ])
787
Takuto Ikuta23388f52022-02-01 01:39:00 +0000788 if sys.platform == 'linux':
Takuto Ikutabfcef252021-08-25 07:46:19 +0000789 # TODO(crbug.com/1243194): remove this after investigation.
790 cmd.extend(['-log-level', 'debug'])
791
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000792 start = time.time()
793
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000794 _run_go_cmd_and_wait(cmd, tmp_dir)
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000795
796 with open(digest_path) as digest_file:
797 digest = digest_file.read()
Junji Watanabec208b302020-09-25 09:18:27 +0000798 h, s = digest.split('/')
799 cas_output_root = {
800 'cas_instance': cas_instance,
801 'digest': {
802 'hash': h,
803 'size_bytes': int(s)
804 }
805 }
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000806 with open(stats_json_path) as stats_file:
807 stats = json.load(stats_file)
808
809 stats['duration'] = time.time() - start
810
Junji Watanabec208b302020-09-25 09:18:27 +0000811 return cas_output_root, stats
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000812 finally:
813 fs.remove(digest_path)
814 fs.remove(stats_json_path)
815
816
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500817def map_and_run(data, constant_run_path):
nodir55be77b2016-05-03 09:39:57 -0700818 """Runs a command with optional isolated input/output.
819
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500820 Arguments:
821 - data: TaskData instance.
822 - constant_run_path: TODO
nodir55be77b2016-05-03 09:39:57 -0700823
824 Returns metadata about the result.
825 """
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000826
Takuto Ikutaa71c6562021-11-18 06:07:55 +0000827 # TODO(tikuta): take stats from state.json in this case too.
828 download_stats = {
829 # 'duration': 0.,
830 # 'initial_number_items': len(data.cas_cache),
831 # 'initial_size': data.cas_cache.total_size,
832 # 'items_cold': '<large.pack()>',
833 # 'items_hot': '<large.pack()>',
834 }
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000835
maruela9cfd6f2015-09-15 11:03:15 -0700836 result = {
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000837 'duration': None,
838 'exit_code': None,
839 'had_hard_timeout': False,
840 'internal_failure': 'run_isolated did not complete properly',
841 'stats': {
Junji Watanabeaee69ad2021-04-28 03:17:34 +0000842 'trim_caches': {
843 'duration': 0,
844 },
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000845 #'cipd': {
846 # 'duration': 0.,
847 # 'get_client_duration': 0.,
848 #},
849 'isolated': {
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +0000850 'download': download_stats,
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000851 #'upload': {
852 # 'duration': 0.,
853 # 'items_cold': '<large.pack()>',
854 # 'items_hot': '<large.pack()>',
855 #},
856 },
Junji Watanabeaee69ad2021-04-28 03:17:34 +0000857 'named_caches': {
858 'install': {
859 'duration': 0,
860 },
861 'uninstall': {
862 'duration': 0,
863 },
864 },
865 'cleanup': {
866 'duration': 0,
867 }
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +0000868 },
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000869 #'cipd_pins': {
870 # 'packages': [
871 # {'package_name': ..., 'version': ..., 'path': ...},
872 # ...
873 # ],
874 # 'client_package': {'package_name': ..., 'version': ...},
875 #},
876 'outputs_ref': None,
Junji Watanabe54925c32020-09-08 00:56:18 +0000877 'cas_output_root': None,
Takuto Ikuta5ed62ad2019-09-26 09:16:00 +0000878 'version': 5,
maruela9cfd6f2015-09-15 11:03:15 -0700879 }
nodirbe642ff2016-06-09 15:51:51 -0700880
Takuto Ikutad46ea762020-10-07 05:43:22 +0000881 assert os.path.isabs(data.root_dir), ("data.root_dir is not abs path: %s" %
882 data.root_dir)
883 file_path.ensure_tree(data.root_dir, 0o700)
884
maruele2f2cb82016-07-13 14:41:03 -0700885 # See comment for these constants.
maruelcffa0542017-04-07 08:39:20 -0700886 # TODO(maruel): This is not obvious. Change this to become an error once we
887 # make the constant_run_path an exposed flag.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500888 if constant_run_path and data.root_dir:
889 run_dir = os.path.join(data.root_dir, ISOLATED_RUN_DIR)
maruel5c4eed82017-05-26 05:33:40 -0700890 if os.path.isdir(run_dir):
891 file_path.rmtree(run_dir)
Lei Leife202df2019-06-11 17:33:34 +0000892 os.mkdir(run_dir, 0o700)
maruelcffa0542017-04-07 08:39:20 -0700893 else:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500894 run_dir = make_temp_dir(ISOLATED_RUN_DIR, data.root_dir)
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000895
maruel03e11842016-07-14 10:50:16 -0700896 # storage should be normally set but don't crash if it is not. This can happen
897 # as Swarming task can run without an isolate server.
Takuto Ikuta417388f2021-11-18 07:39:52 +0000898 out_dir = make_temp_dir(ISOLATED_OUT_DIR, data.root_dir)
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500899 tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, data.root_dir)
nodir55be77b2016-05-03 09:39:57 -0700900 cwd = run_dir
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -0500901 if data.relative_cwd:
902 cwd = os.path.normpath(os.path.join(cwd, data.relative_cwd))
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500903 command = data.command
Junji Watanabe1adba7b2020-09-18 07:03:58 +0000904
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000905 cas_client_dir = make_temp_dir(_CAS_CLIENT_DIR, data.root_dir)
Takuto Ikuta417388f2021-11-18 07:39:52 +0000906 cas_client = os.path.join(cas_client_dir, 'cas' + cipd.EXECUTABLE_SUFFIX)
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000907
Junji Watanabeaee69ad2021-04-28 03:17:34 +0000908 data.trim_caches_fn(result['stats']['trim_caches'])
909
Anirudh Mathukumilli92d57b62021-08-04 23:21:57 +0000910 nsjail_dir = None
911 if (sys.platform == "linux" and cipd.get_platform() == "amd64" and
912 data.containment.containment_type == subprocess42.Containment.NSJAIL):
913 nsjail_dir = make_temp_dir(_NSJAIL_DIR, data.root_dir)
914
nodir55be77b2016-05-03 09:39:57 -0700915 try:
Takuto Ikuta1ce61362021-11-16 05:44:17 +0000916 with data.install_packages_fn(run_dir, cas_client_dir,
Anirudh Mathukumilli92d57b62021-08-04 23:21:57 +0000917 nsjail_dir) as cipd_info:
vadimsh232f5a82017-01-20 19:23:44 -0800918 if cipd_info:
919 result['stats']['cipd'] = cipd_info.stats
920 result['cipd_pins'] = cipd_info.pins
nodir90bc8dc2016-06-15 13:35:21 -0700921
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000922 isolated_stats = result['stats'].setdefault('isolated', {})
Takuto Ikutab58dbd12020-06-05 09:29:14 +0000923
Takuto Ikutacd68ef52021-11-18 04:11:45 +0000924 if data.cas_digest:
925 stats = _fetch_and_map(
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000926 cas_client=cas_client,
927 digest=data.cas_digest,
928 instance=data.cas_instance,
929 output_dir=run_dir,
Junji Watanabeb03450b2020-09-25 05:09:27 +0000930 cache_dir=data.cas_cache_dir,
Takuto Ikutaae391c52020-12-03 08:43:45 +0000931 policies=data.cas_cache_policies,
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +0000932 kvs_dir=data.cas_kvs,
Takuto Ikuta0f8a19c2021-03-02 00:50:38 +0000933 tmp_dir=tmp_dir)
Junji Watanabe4b890ef2020-09-16 01:43:27 +0000934 isolated_stats['download'].update(stats)
Junji Watanabe54925c32020-09-08 00:56:18 +0000935
maruelabec63c2017-04-26 11:53:24 -0700936 if not command:
937 # Handle this as a task failure, not an internal failure.
938 sys.stderr.write(
939 '<No command was specified!>\n'
940 '<Please secify a command when triggering your Swarming task>\n')
941 result['exit_code'] = 1
942 return result
nodirbe642ff2016-06-09 15:51:51 -0700943
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -0500944 if not cwd.startswith(run_dir):
945 # Handle this as a task failure, not an internal failure. This is a
946 # 'last chance' way to gate against directory escape.
947 sys.stderr.write('<Relative CWD is outside of run directory!>\n')
948 result['exit_code'] = 1
949 return result
950
951 if not os.path.isdir(cwd):
952 # Accepts relative_cwd that does not exist.
Lei Leife202df2019-06-11 17:33:34 +0000953 os.makedirs(cwd, 0o700)
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -0500954
vadimsh232f5a82017-01-20 19:23:44 -0800955 # If we have an explicit list of files to return, make sure their
956 # directories exist now.
Takuto Ikutaab8d0232021-11-16 12:12:09 +0000957 if data.outputs:
Takuto Ikutae0dce462021-11-16 08:49:46 +0000958 file_path.create_directories(run_dir, data.outputs)
aludwin0a8e17d2016-10-27 15:57:39 -0700959
Junji Watanabeaee69ad2021-04-28 03:17:34 +0000960 with data.install_named_caches(run_dir, result['stats']['named_caches']):
nodird6160682017-02-02 13:03:35 -0800961 sys.stdout.flush()
962 start = time.time()
963 try:
vadimsh9c54b2c2017-07-25 14:08:29 -0700964 # Need to switch the default account before 'get_command_env' call,
965 # so it can grab correct value of LUCI_CONTEXT env var.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -0500966 with set_luci_context_account(data.switch_to_account, tmp_dir):
967 env = get_command_env(
Roberto Carrillo71ade6d2018-10-08 22:30:24 +0000968 tmp_dir, cipd_info, run_dir, data.env, data.env_prefix, out_dir,
969 data.bot_file)
Brian Sheedy7a761172019-08-30 22:55:14 +0000970 command = tools.find_executable(command, env)
Robert Iannucci24ae76a2018-02-26 12:51:18 -0800971 command = process_command(command, out_dir, data.bot_file)
972 file_path.ensure_command_has_abs_path(command, cwd)
973
vadimsh9c54b2c2017-07-25 14:08:29 -0700974 result['exit_code'], result['had_hard_timeout'] = run_command(
Marc-Antoine Ruel03c6fd12019-04-30 12:12:55 +0000975 command, cwd, env, data.hard_timeout, data.grace_period,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +0000976 data.lower_priority, data.containment)
nodird6160682017-02-02 13:03:35 -0800977 finally:
978 result['duration'] = max(time.time() - start, 0)
Seth Koehler49139812017-12-19 13:59:33 -0500979
Takuto Ikuta417388f2021-11-18 07:39:52 +0000980 # Try to link files to the output directory, if specified.
981 link_outputs_to_outdir(run_dir, out_dir, data.outputs)
982 isolated_stats = result['stats'].setdefault('isolated', {})
983 result['cas_output_root'], upload_stats = upload_outdir(
984 cas_client, data.cas_instance, out_dir, tmp_dir)
985 if upload_stats:
986 isolated_stats['upload'] = upload_stats
Takuto Ikutacd68ef52021-11-18 04:11:45 +0000987
Seth Koehler49139812017-12-19 13:59:33 -0500988 # We successfully ran the command, set internal_failure back to
989 # None (even if the command failed, it's not an internal error).
990 result['internal_failure'] = None
Justin Luong54fa9592022-08-11 03:44:40 +0000991 except NonRetriableCasException as e:
992 # We could not find the CAS package. The swarming task should not
993 # be retried automatically
994 result['missing_cas'] = e.to_dict()
995 logging.exception('internal failure: %s', e)
996 result['internal_failure'] = str(e)
997 on_error.report(None)
998
999 except NonRetriableCipdException as e:
1000 # We could not find the CIPD package. The swarming task should not
1001 # be retried automatically
1002 result['missing_cipd'] = [e.to_dict()]
1003 logging.exception('internal failure: %s', e)
1004 result['internal_failure'] = str(e)
1005 on_error.report(None)
maruela9cfd6f2015-09-15 11:03:15 -07001006 except Exception as e:
nodir90bc8dc2016-06-15 13:35:21 -07001007 # An internal error occurred. Report accordingly so the swarming task will
1008 # be retried automatically.
maruel12e30012015-10-09 11:55:35 -07001009 logging.exception('internal failure: %s', e)
maruela9cfd6f2015-09-15 11:03:15 -07001010 result['internal_failure'] = str(e)
1011 on_error.report(None)
aludwin0a8e17d2016-10-27 15:57:39 -07001012
1013 # Clean up
maruela9cfd6f2015-09-15 11:03:15 -07001014 finally:
1015 try:
Junji Watanabeaee69ad2021-04-28 03:17:34 +00001016 cleanup_start = time.time()
Ye Kuangbc4e8402020-07-29 09:54:30 +00001017 success = True
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001018 if data.leak_temp_dir:
nodir32a1ec12016-10-26 18:34:07 -07001019 success = True
maruela9cfd6f2015-09-15 11:03:15 -07001020 logging.warning(
1021 'Deliberately leaking %s for later examination', run_dir)
marueleb5fbee2015-09-17 13:01:36 -07001022 else:
maruel84537cb2015-10-16 14:21:28 -07001023 # On Windows rmtree(run_dir) call above has a synchronization effect: it
1024 # finishes only when all task child processes terminate (since a running
1025 # process locks *.exe file). Examine out_dir only after that call
1026 # completes (since child processes may write to out_dir too and we need
1027 # to wait for them to finish).
Takuto Ikuta1ce61362021-11-16 05:44:17 +00001028 dirs_to_remove = [run_dir, tmp_dir, cas_client_dir]
Ye Kuangbc4e8402020-07-29 09:54:30 +00001029 if out_dir:
1030 dirs_to_remove.append(out_dir)
1031 for directory in dirs_to_remove:
Takuto Ikuta69c0d662019-11-27 01:18:08 +00001032 if not fs.isdir(directory):
1033 continue
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001034 start = time.time()
maruel84537cb2015-10-16 14:21:28 -07001035 try:
Junji Watanabecc4eefd2021-01-19 01:46:10 +00001036 file_path.rmtree(directory)
maruel84537cb2015-10-16 14:21:28 -07001037 except OSError as e:
Takuto Ikuta69c0d662019-11-27 01:18:08 +00001038 logging.error('rmtree(%r) failed: %s', directory, e)
maruel84537cb2015-10-16 14:21:28 -07001039 success = False
Junji Watanabe9cdfff52021-01-08 07:20:35 +00001040 finally:
1041 logging.info('Cleanup: rmtree(%r) took %d seconds', directory,
1042 time.time() - start)
maruel84537cb2015-10-16 14:21:28 -07001043 if not success:
Takuto Ikuta69c0d662019-11-27 01:18:08 +00001044 sys.stderr.write(
1045 OUTLIVING_ZOMBIE_MSG % (directory, data.grace_period))
Junji Watanabed952bf12021-05-13 03:15:54 +00001046 if sys.platform == 'win32':
1047 subprocess42.check_call(['tasklist.exe', '/V'], stdout=sys.stderr)
1048 else:
1049 subprocess42.check_call(['ps', 'axu'], stdout=sys.stderr)
maruel84537cb2015-10-16 14:21:28 -07001050 if result['exit_code'] == 0:
1051 result['exit_code'] = 1
maruela9cfd6f2015-09-15 11:03:15 -07001052
maruela9cfd6f2015-09-15 11:03:15 -07001053 if not success and result['exit_code'] == 0:
1054 result['exit_code'] = 1
1055 except Exception as e:
1056 # Swallow any exception in the main finally clause.
nodir9130f072016-05-27 13:59:08 -07001057 if out_dir:
1058 logging.exception('Leaking out_dir %s: %s', out_dir, e)
maruela9cfd6f2015-09-15 11:03:15 -07001059 result['internal_failure'] = str(e)
Takuto Ikutaa9a907b2020-04-17 08:50:50 +00001060 on_error.report(None)
Junji Watanabeaee69ad2021-04-28 03:17:34 +00001061 finally:
1062 cleanup_duration = time.time() - cleanup_start
1063 result['stats']['cleanup']['duration'] = cleanup_duration
1064 logging.info('Cleanup: removing directories took %d seconds',
1065 cleanup_duration)
maruela9cfd6f2015-09-15 11:03:15 -07001066 return result
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001067
1068
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001069def run_tha_test(data, result_json):
nodir55be77b2016-05-03 09:39:57 -07001070 """Runs an executable and records execution metadata.
1071
nodir55be77b2016-05-03 09:39:57 -07001072 If isolated_hash is specified, downloads the dependencies in the cache,
1073 hardlinks them into a temporary directory and runs the command specified in
1074 the .isolated.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -05001075
1076 A temporary directory is created to hold the output files. The content inside
1077 this directory will be uploaded back to |storage| packaged as a .isolated
1078 file.
1079
1080 Arguments:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001081 - data: TaskData instance.
1082 - result_json: File path to dump result metadata into. If set, the process
1083 exit code is always 0 unless an internal error occurred.
maruela9cfd6f2015-09-15 11:03:15 -07001084
1085 Returns:
1086 Process exit code that should be used.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001087 """
maruela76b9ee2015-12-15 06:18:08 -08001088 if result_json:
1089 # Write a json output file right away in case we get killed.
1090 result = {
Junji Watanabe54925c32020-09-08 00:56:18 +00001091 'exit_code': None,
1092 'had_hard_timeout': False,
1093 'internal_failure': 'Was terminated before completion',
1094 'outputs_ref': None,
1095 'cas_output_root': None,
1096 'version': 5,
maruela76b9ee2015-12-15 06:18:08 -08001097 }
1098 tools.write_json(result_json, result, dense=True)
1099
maruela9cfd6f2015-09-15 11:03:15 -07001100 # run_isolated exit code. Depends on if result_json is used or not.
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001101 result = map_and_run(data, True)
maruela9cfd6f2015-09-15 11:03:15 -07001102 logging.info('Result:\n%s', tools.format_json(result, dense=True))
bpastene3ae09522016-06-10 17:12:59 -07001103
maruela9cfd6f2015-09-15 11:03:15 -07001104 if result_json:
maruel05d5a882015-09-21 13:59:02 -07001105 # We've found tests to delete 'work' when quitting, causing an exception
1106 # here. Try to recreate the directory if necessary.
nodire5028a92016-04-29 14:38:21 -07001107 file_path.ensure_tree(os.path.dirname(result_json))
maruela9cfd6f2015-09-15 11:03:15 -07001108 tools.write_json(result_json, result, dense=True)
1109 # Only return 1 if there was an internal error.
1110 return int(bool(result['internal_failure']))
maruel@chromium.org781ccf62013-09-17 19:39:47 +00001111
maruela9cfd6f2015-09-15 11:03:15 -07001112 # Marshall into old-style inline output.
1113 if result['outputs_ref']:
Marc-Antoine Ruel793bff32019-04-18 17:50:48 +00001114 # pylint: disable=unsubscriptable-object
maruela9cfd6f2015-09-15 11:03:15 -07001115 data = {
Junji Watanabe38b28b02020-04-23 10:23:30 +00001116 'hash': result['outputs_ref']['isolated'],
1117 'namespace': result['outputs_ref']['namespace'],
1118 'storage': result['outputs_ref']['isolatedserver'],
maruela9cfd6f2015-09-15 11:03:15 -07001119 }
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -05001120 sys.stdout.flush()
Junji Watanabe38b28b02020-04-23 10:23:30 +00001121 print('[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
1122 tools.format_json(data, dense=True))
maruelb76604c2015-11-11 11:53:44 -08001123 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -07001124 return result['exit_code'] or int(bool(result['internal_failure']))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001125
1126
iannuccib58d10d2017-03-18 02:00:25 -07001127# Yielded by 'install_client_and_packages'.
vadimsh232f5a82017-01-20 19:23:44 -08001128CipdInfo = collections.namedtuple('CipdInfo', [
1129 'client', # cipd.CipdClient object
1130 'cache_dir', # absolute path to bot-global cipd tag and instance cache
1131 'stats', # dict with stats to return to the server
1132 'pins', # dict with installed cipd pins to return to the server
1133])
1134
1135
1136@contextlib.contextmanager
Takuto Ikuta1ce61362021-11-16 05:44:17 +00001137def copy_local_packages(_run_dir, cas_dir, _nsjail_dir):
Junji Watanabedc2f89e2021-11-08 08:44:30 +00001138 """Copies CIPD packages from luci/luci-go dir."""
1139 go_client_dir = os.environ.get('LUCI_GO_CLIENT_DIR')
1140 assert go_client_dir, ('Please set LUCI_GO_CLIENT_DIR env var to install CIPD'
1141 ' packages locally.')
1142 shutil.copy2(os.path.join(go_client_dir, 'cas' + cipd.EXECUTABLE_SUFFIX),
1143 os.path.join(cas_dir, 'cas' + cipd.EXECUTABLE_SUFFIX))
vadimsh232f5a82017-01-20 19:23:44 -08001144 yield None
1145
1146
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001147def _install_packages(run_dir, cipd_cache_dir, client, packages):
iannuccib58d10d2017-03-18 02:00:25 -07001148 """Calls 'cipd ensure' for packages.
1149
1150 Args:
1151 run_dir (str): root of installation.
1152 cipd_cache_dir (str): the directory to use for the cipd package cache.
1153 client (CipdClient): the cipd client to use
1154 packages: packages to install, list [(path, package_name, version), ...].
iannuccib58d10d2017-03-18 02:00:25 -07001155
1156 Returns: list of pinned packages. Looks like [
1157 {
1158 'path': 'subdirectory',
1159 'package_name': 'resolved/package/name',
1160 'version': 'deadbeef...',
1161 },
1162 ...
1163 ]
1164 """
1165 package_pins = [None]*len(packages)
1166 def insert_pin(path, name, version, idx):
1167 package_pins[idx] = {
1168 'package_name': name,
1169 # swarming deals with 'root' as '.'
1170 'path': path or '.',
1171 'version': version,
1172 }
1173
1174 by_path = collections.defaultdict(list)
1175 for i, (path, name, version) in enumerate(packages):
1176 # cipd deals with 'root' as ''
1177 if path == '.':
1178 path = ''
1179 by_path[path].append((name, version, i))
1180
1181 pins = client.ensure(
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001182 run_dir,
1183 {
1184 subdir: [(name, vers) for name, vers, _ in pkgs
1185 ] for subdir, pkgs in by_path.items()
1186 },
1187 cache_dir=cipd_cache_dir,
iannuccib58d10d2017-03-18 02:00:25 -07001188 )
1189
Marc-Antoine Ruel04903a32019-10-09 21:09:25 +00001190 for subdir, pin_list in sorted(pins.items()):
iannuccib58d10d2017-03-18 02:00:25 -07001191 this_subdir = by_path[subdir]
1192 for i, (name, version) in enumerate(pin_list):
1193 insert_pin(subdir, name, version, this_subdir[i][2])
1194
Robert Iannucci461b30d2017-12-13 11:34:03 -08001195 assert None not in package_pins, (packages, pins, package_pins)
iannuccib58d10d2017-03-18 02:00:25 -07001196
1197 return package_pins
1198
1199
vadimsh232f5a82017-01-20 19:23:44 -08001200@contextlib.contextmanager
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001201def install_client_and_packages(run_dir, packages, service_url,
Takuto Ikutab7ce0e32019-11-27 23:26:18 +00001202 client_package_name, client_version, cache_dir,
Takuto Ikuta1ce61362021-11-16 05:44:17 +00001203 cas_dir, nsjail_dir):
vadimsh902948e2017-01-20 15:57:32 -08001204 """Bootstraps CIPD client and installs CIPD packages.
iannucci96fcccc2016-08-30 15:52:22 -07001205
vadimsh232f5a82017-01-20 19:23:44 -08001206 Yields CipdClient, stats, client info and pins (as single CipdInfo object).
1207
1208 Pins and the CIPD client info are in the form of:
iannucci96fcccc2016-08-30 15:52:22 -07001209 [
1210 {
1211 "path": path, "package_name": package_name, "version": version,
1212 },
1213 ...
1214 ]
vadimsh902948e2017-01-20 15:57:32 -08001215 (the CIPD client info is a single dictionary instead of a list)
iannucci96fcccc2016-08-30 15:52:22 -07001216
1217 such that they correspond 1:1 to all input package arguments from the command
1218 line. These dictionaries make their all the way back to swarming, where they
1219 become the arguments of CipdPackage.
nodirbe642ff2016-06-09 15:51:51 -07001220
vadimsh902948e2017-01-20 15:57:32 -08001221 If 'packages' list is empty, will bootstrap CIPD client, but won't install
1222 any packages.
1223
1224 The bootstrapped client (regardless whether 'packages' list is empty or not),
vadimsh232f5a82017-01-20 19:23:44 -08001225 will be made available to the task via $PATH.
vadimsh902948e2017-01-20 15:57:32 -08001226
nodirbe642ff2016-06-09 15:51:51 -07001227 Args:
nodir90bc8dc2016-06-15 13:35:21 -07001228 run_dir (str): root of installation.
vadimsh902948e2017-01-20 15:57:32 -08001229 packages: packages to install, list [(path, package_name, version), ...].
nodirbe642ff2016-06-09 15:51:51 -07001230 service_url (str): CIPD server url, e.g.
1231 "https://chrome-infra-packages.appspot.com."
nodir90bc8dc2016-06-15 13:35:21 -07001232 client_package_name (str): CIPD package name of CIPD client.
1233 client_version (str): Version of CIPD client.
nodirbe642ff2016-06-09 15:51:51 -07001234 cache_dir (str): where to keep cache of cipd clients, packages and tags.
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001235 cas_dir (str): where to download cas client.
Anirudh Mathukumilli92d57b62021-08-04 23:21:57 +00001236 nsjail_dir (str): where to download nsjail. If set to None, nsjail is not
1237 downloaded.
nodirbe642ff2016-06-09 15:51:51 -07001238 """
1239 assert cache_dir
nodir90bc8dc2016-06-15 13:35:21 -07001240
nodirbe642ff2016-06-09 15:51:51 -07001241 start = time.time()
nodirbe642ff2016-06-09 15:51:51 -07001242
vadimsh902948e2017-01-20 15:57:32 -08001243 cache_dir = os.path.abspath(cache_dir)
vadimsh232f5a82017-01-20 19:23:44 -08001244 cipd_cache_dir = os.path.join(cache_dir, 'cache') # tag and instance caches
nodir90bc8dc2016-06-15 13:35:21 -07001245 run_dir = os.path.abspath(run_dir)
vadimsh902948e2017-01-20 15:57:32 -08001246 packages = packages or []
nodir90bc8dc2016-06-15 13:35:21 -07001247
nodirbe642ff2016-06-09 15:51:51 -07001248 get_client_start = time.time()
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001249 client_manager = cipd.get_client(cache_dir, service_url, client_package_name,
1250 client_version)
iannucci96fcccc2016-08-30 15:52:22 -07001251
nodirbe642ff2016-06-09 15:51:51 -07001252 with client_manager as client:
1253 get_client_duration = time.time() - get_client_start
nodir90bc8dc2016-06-15 13:35:21 -07001254
iannuccib58d10d2017-03-18 02:00:25 -07001255 package_pins = []
1256 if packages:
Takuto Ikuta2efc7792019-11-27 14:33:34 +00001257 package_pins = _install_packages(run_dir, cipd_cache_dir, client,
1258 packages)
iannuccib58d10d2017-03-18 02:00:25 -07001259
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001260 # Install cas client to |cas_dir|.
1261 _install_packages(cas_dir, cipd_cache_dir, client,
Takuto Ikuta9c4eb1d2020-10-05 03:40:14 +00001262 [('', _CAS_PACKAGE, _LUCI_GO_REVISION)])
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001263
Anirudh Mathukumilli92d57b62021-08-04 23:21:57 +00001264 # Install nsjail to |nsjail_dir|.
1265 if nsjail_dir is not None:
1266 _install_packages(nsjail_dir, cipd_cache_dir, client,
1267 [('', _NSJAIL_PACKAGE, _NSJAIL_VERSION)])
1268
iannuccib58d10d2017-03-18 02:00:25 -07001269 file_path.make_tree_files_read_only(run_dir)
nodir90bc8dc2016-06-15 13:35:21 -07001270
vadimsh232f5a82017-01-20 19:23:44 -08001271 total_duration = time.time() - start
Junji Watanabe38b28b02020-04-23 10:23:30 +00001272 logging.info('Installing CIPD client and packages took %d seconds',
1273 total_duration)
nodir90bc8dc2016-06-15 13:35:21 -07001274
vadimsh232f5a82017-01-20 19:23:44 -08001275 yield CipdInfo(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001276 client=client,
1277 cache_dir=cipd_cache_dir,
1278 stats={
1279 'duration': total_duration,
1280 'get_client_duration': get_client_duration,
iannuccib58d10d2017-03-18 02:00:25 -07001281 },
Junji Watanabe38b28b02020-04-23 10:23:30 +00001282 pins={
1283 'client_package': {
1284 'package_name': client.package_name,
1285 'version': client.instance_id,
1286 },
1287 'packages': package_pins,
1288 })
nodirbe642ff2016-06-09 15:51:51 -07001289
1290
1291def create_option_parser():
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -04001292 parser = logging_utils.OptionParserWithLogging(
nodir55be77b2016-05-03 09:39:57 -07001293 usage='%prog <options> [command to run or extra args]',
maruel@chromium.orgdedbf492013-09-12 20:42:11 +00001294 version=__version__,
1295 log_file=RUN_ISOLATED_LOG_FILE)
maruela9cfd6f2015-09-15 11:03:15 -07001296 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001297 '--clean',
1298 action='store_true',
maruel36a963d2016-04-08 17:15:49 -07001299 help='Cleans the cache, trimming it necessary and remove corrupted items '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001300 'and returns without executing anything; use with -v to know what '
1301 'was done')
maruel36a963d2016-04-08 17:15:49 -07001302 parser.add_option(
maruela9cfd6f2015-09-15 11:03:15 -07001303 '--json',
1304 help='dump output metadata to json file. When used, run_isolated returns '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001305 'non-zero only on internal failure')
maruel6be7f9e2015-10-01 12:25:30 -07001306 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -08001307 '--hard-timeout', type='float', help='Enforce hard timeout in execution')
maruel6be7f9e2015-10-01 12:25:30 -07001308 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001309 '--grace-period',
1310 type='float',
maruel6be7f9e2015-10-01 12:25:30 -07001311 help='Grace period between SIGTERM and SIGKILL')
bpastene3ae09522016-06-10 17:12:59 -07001312 parser.add_option(
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001313 '--relative-cwd',
Takuto Ikuta18ca29a2020-12-04 07:34:20 +00001314 help='Ignore the isolated \'relative_cwd\' and use this one instead')
Marc-Antoine Ruel95068cf2017-12-07 21:35:05 -05001315 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001316 '--env',
1317 default=[],
1318 action='append',
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001319 help='Environment variables to set for the child process')
1320 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001321 '--env-prefix',
1322 default=[],
1323 action='append',
Robert Iannuccibf5f84c2017-11-22 12:56:50 -08001324 help='Specify a VAR=./path/fragment to put in the environment variable '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001325 'before executing the command. The path fragment must be relative '
1326 'to the isolated run directory, and must not contain a `..` token. '
1327 'The path will be made absolute and prepended to the indicated '
1328 '$VAR using the OS\'s path separator. Multiple items for the same '
1329 '$VAR will be prepended in order.')
Robert Iannuccibf5f84c2017-11-22 12:56:50 -08001330 parser.add_option(
bpastene3ae09522016-06-10 17:12:59 -07001331 '--bot-file',
1332 help='Path to a file describing the state of the host. The content is '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001333 'defined by on_before_task() in bot_config.')
aludwin7556e0c2016-10-26 08:46:10 -07001334 parser.add_option(
vadimsh9c54b2c2017-07-25 14:08:29 -07001335 '--switch-to-account',
1336 help='If given, switches LUCI_CONTEXT to given logical service account '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001337 '(e.g. "task" or "system") before launching the isolated process.')
vadimsh9c54b2c2017-07-25 14:08:29 -07001338 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001339 '--output',
1340 action='append',
aludwin0a8e17d2016-10-27 15:57:39 -07001341 help='Specifies an output to return. If no outputs are specified, all '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001342 'files located in $(ISOLATED_OUTDIR) will be returned; '
1343 'otherwise, outputs in both $(ISOLATED_OUTDIR) and those '
1344 'specified by --output option (there can be multiple) will be '
1345 'returned. Note that if a file in OUT_DIR has the same path '
1346 'as an --output option, the --output version will be returned.')
aludwin0a8e17d2016-10-27 15:57:39 -07001347 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001348 '-a',
1349 '--argsfile',
aludwin7556e0c2016-10-26 08:46:10 -07001350 # This is actually handled in parse_args; it's included here purely so it
1351 # can make it into the help text.
1352 help='Specify a file containing a JSON array of arguments to this '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001353 'script. If --argsfile is provided, no other argument may be '
1354 'provided on the command line.')
Takuto Ikutad4be2f12020-05-12 02:15:25 +00001355 parser.add_option(
1356 '--report-on-exception',
1357 action='store_true',
1358 help='Whether report exception during execution to isolate server. '
1359 'This flag should only be used in swarming bot.')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001360
Junji Watanabe4b890ef2020-09-16 01:43:27 +00001361 group = optparse.OptionGroup(parser,
1362 'Data source - Content Addressed Storage')
Junji Watanabe54925c32020-09-08 00:56:18 +00001363 group.add_option(
1364 '--cas-instance', help='Full CAS instance name for input/output files.')
1365 group.add_option(
1366 '--cas-digest',
1367 help='Digest of the input root on RBE-CAS. The format is '
1368 '`{hash}/{size_bytes}`.')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001369 parser.add_option_group(group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001370
Junji Watanabeb03450b2020-09-25 05:09:27 +00001371 # Cache options.
Junji Watanabeb03450b2020-09-25 05:09:27 +00001372 add_cas_cache_options(parser)
nodirbe642ff2016-06-09 15:51:51 -07001373
1374 cipd.add_cipd_options(parser)
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001375
1376 group = optparse.OptionGroup(parser, 'Named caches')
1377 group.add_option(
1378 '--named-cache',
1379 dest='named_caches',
1380 action='append',
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001381 nargs=3,
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001382 default=[],
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001383 help='A named cache to request. Accepts 3 arguments: name, path, hint. '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001384 'name identifies the cache, must match regex [a-z0-9_]{1,4096}. '
1385 'path is a path relative to the run dir where the cache directory '
1386 'must be put to. '
1387 'This option can be specified more than once.')
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001388 group.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001389 '--named-cache-root',
1390 default='named_caches',
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001391 help='Cache root directory. Default=%default')
1392 parser.add_option_group(group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001393
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001394 group = optparse.OptionGroup(parser, 'Process containment')
1395 parser.add_option(
1396 '--lower-priority', action='store_true',
1397 help='Lowers the child process priority')
1398 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001399 '--containment-type',
Anirudh Mathukumilli92d57b62021-08-04 23:21:57 +00001400 choices=('NONE', 'AUTO', 'JOB_OBJECT', 'NSJAIL'),
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001401 default='NONE',
1402 help='Type of container to use')
1403 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001404 '--limit-processes',
1405 type='int',
1406 default=0,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001407 help='Maximum number of active processes in the containment')
1408 parser.add_option(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001409 '--limit-total-committed-memory',
1410 type='int',
1411 default=0,
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001412 help='Maximum sum of committed memory in the containment')
1413 parser.add_option_group(group)
1414
1415 group = optparse.OptionGroup(parser, 'Debugging')
1416 group.add_option(
Kenneth Russell61d42352014-09-15 11:41:16 -07001417 '--leak-temp-dir',
1418 action='store_true',
nodirbe642ff2016-06-09 15:51:51 -07001419 help='Deliberately leak isolate\'s temp dir for later examination. '
Junji Watanabe38b28b02020-04-23 10:23:30 +00001420 'Default: %default')
1421 group.add_option('--root-dir', help='Use a directory instead of a random one')
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001422 parser.add_option_group(group)
Kenneth Russell61d42352014-09-15 11:41:16 -07001423
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001424 auth.add_auth_options(parser)
nodirbe642ff2016-06-09 15:51:51 -07001425
Ye Kuang1d096cb2020-06-26 08:38:21 +00001426 parser.set_defaults(cache='cache')
nodirbe642ff2016-06-09 15:51:51 -07001427 return parser
1428
1429
Junji Watanabeb03450b2020-09-25 05:09:27 +00001430def add_cas_cache_options(parser):
1431 group = optparse.OptionGroup(parser, 'CAS cache management')
1432 group.add_option(
1433 '--cas-cache',
1434 metavar='DIR',
1435 default='cas-cache',
1436 help='Directory to keep a local cache of the files. Accelerates download '
1437 'by reusing already downloaded files. Default=%default')
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001438 group.add_option(
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001439 '--kvs-dir',
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001440 default='',
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001441 help='CAS cache dir using kvs for small files. Default=%default')
Takuto Ikutaa71c6562021-11-18 06:07:55 +00001442 group.add_option(
1443 '--max-cache-size',
1444 type='int',
1445 metavar='NNN',
1446 default=50 * 1024 * 1024 * 1024,
1447 help='Trim if the cache gets larger than this value, default=%default')
1448 group.add_option(
1449 '--min-free-space',
1450 type='int',
1451 metavar='NNN',
1452 default=2 * 1024 * 1024 * 1024,
1453 help='Trim if disk free space becomes lower than this value, '
1454 'default=%default')
Junji Watanabeb03450b2020-09-25 05:09:27 +00001455 parser.add_option_group(group)
1456
1457
1458def process_cas_cache_options(options):
1459 if options.cas_cache:
1460 policies = local_caching.CachePolicies(
1461 max_cache_size=options.max_cache_size,
1462 min_free_space=options.min_free_space,
1463 # max_items isn't used for CAS cache for now.
1464 max_items=None,
1465 max_age_secs=MAX_AGE_SECS)
1466
Junji Watanabe7a631b02022-01-13 02:30:29 +00001467 return local_caching.DiskContentAddressedCache(os.path.abspath(
1468 options.cas_cache),
1469 policies,
1470 trim=False)
Junji Watanabeb03450b2020-09-25 05:09:27 +00001471 return local_caching.MemoryContentAddressedCache()
1472
1473
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001474def process_named_cache_options(parser, options, time_fn=None):
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001475 """Validates named cache options and returns a CacheManager."""
1476 if options.named_caches and not options.named_cache_root:
1477 parser.error('--named-cache is specified, but --named-cache-root is empty')
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001478 for name, path, hint in options.named_caches:
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001479 if not CACHE_NAME_RE.match(name):
1480 parser.error(
1481 'cache name %r does not match %r' % (name, CACHE_NAME_RE.pattern))
1482 if not path:
1483 parser.error('cache path cannot be empty')
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001484 try:
Takuto Ikuta630f99d2020-07-02 12:59:35 +00001485 int(hint)
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001486 except ValueError:
1487 parser.error('cache hint must be a number')
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001488 if options.named_cache_root:
1489 # Make these configurable later if there is use case but for now it's fairly
1490 # safe values.
1491 # In practice, a fair chunk of bots are already recycled on a daily schedule
1492 # so this code doesn't have any effect to them, unless they are preloaded
1493 # with a really old cache.
1494 policies = local_caching.CachePolicies(
1495 # 1TiB.
1496 max_cache_size=1024*1024*1024*1024,
1497 min_free_space=options.min_free_space,
1498 max_items=50,
Marc-Antoine Ruel5d7606b2018-06-15 19:06:12 +00001499 max_age_secs=MAX_AGE_SECS)
Junji Watanabe7a631b02022-01-13 02:30:29 +00001500 root_dir = os.path.abspath(options.named_cache_root)
John Budorickc6186972020-02-26 00:58:14 +00001501 cache = local_caching.NamedCache(root_dir, policies, time_fn=time_fn)
1502 # Touch any named caches we're going to use to minimize thrashing
1503 # between tasks that request some (but not all) of the same named caches.
John Budorick0a4dab62020-03-02 22:23:35 +00001504 cache.touch(*[name for name, _, _ in options.named_caches])
John Budorickc6186972020-02-26 00:58:14 +00001505 return cache
Marc-Antoine Ruel8b11dbd2018-05-18 14:31:22 -04001506 return None
1507
1508
aludwin7556e0c2016-10-26 08:46:10 -07001509def parse_args(args):
1510 # Create a fake mini-parser just to get out the "-a" command. Note that
1511 # it's not documented here; instead, it's documented in create_option_parser
1512 # even though that parser will never actually get to parse it. This is
1513 # because --argsfile is exclusive with all other options and arguments.
1514 file_argparse = argparse.ArgumentParser(add_help=False)
1515 file_argparse.add_argument('-a', '--argsfile')
1516 (file_args, nonfile_args) = file_argparse.parse_known_args(args)
1517 if file_args.argsfile:
1518 if nonfile_args:
1519 file_argparse.error('Can\'t specify --argsfile with'
1520 'any other arguments (%s)' % nonfile_args)
1521 try:
1522 with open(file_args.argsfile, 'r') as f:
1523 args = json.load(f)
1524 except (IOError, OSError, ValueError) as e:
1525 # We don't need to error out here - "args" is now empty,
1526 # so the call below to parser.parse_args(args) will fail
1527 # and print the full help text.
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +00001528 print('Couldn\'t read arguments: %s' % e, file=sys.stderr)
aludwin7556e0c2016-10-26 08:46:10 -07001529
1530 # Even if we failed to read the args, just call the normal parser now since it
1531 # will print the correct help message.
nodirbe642ff2016-06-09 15:51:51 -07001532 parser = create_option_parser()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001533 options, args = parser.parse_args(args)
Ye Kuangfff1e502020-07-13 13:21:57 +00001534 if not isinstance(options.cipd_enabled, (bool, int)):
1535 options.cipd_enabled = distutils.util.strtobool(options.cipd_enabled)
aludwin7556e0c2016-10-26 08:46:10 -07001536 return (parser, options, args)
1537
1538
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001539def _calc_named_cache_hint(named_cache, named_caches):
1540 """Returns the expected size of the missing named caches."""
1541 present = named_cache.available
1542 size = 0
Takuto Ikutad169bfd2021-08-02 05:45:09 +00001543 logging.info('available named cache %s', present)
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001544 for name, _, hint in named_caches:
1545 if name not in present:
Takuto Ikuta630f99d2020-07-02 12:59:35 +00001546 hint = int(hint)
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001547 if hint > 0:
Takuto Ikuta74686842021-07-30 04:11:03 +00001548 logging.info("named cache hint: %s, %d", name, hint)
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001549 size += hint
Takuto Ikuta74686842021-07-30 04:11:03 +00001550 logging.info("total size of named cache hint: %d", size)
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001551 return size
1552
1553
Takuto Ikutaae391c52020-12-03 08:43:45 +00001554def _clean_cmd(parser, options, caches, root):
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001555 """Cleanup cache dirs/files."""
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001556 if options.json:
1557 parser.error('Can\'t use --json with --clean.')
1558 if options.named_caches:
1559 parser.error('Can\t use --named-cache with --clean.')
1560 if options.cas_instance or options.cas_digest:
1561 parser.error('Can\t use --cas-instance, --cas-digest with --clean.')
1562
1563 logging.info("initial free space: %d", file_path.get_free_space(root))
1564
Junji Watanabe7a631b02022-01-13 02:30:29 +00001565 if options.kvs_dir and fs.isdir(options.kvs_dir):
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001566 # Remove kvs file if its size exceeds fixed threshold.
Junji Watanabe7a631b02022-01-13 02:30:29 +00001567 kvs_dir = options.kvs_dir
Takuto Ikutab1b70062021-03-22 01:02:41 +00001568 size = file_path.get_recursive_size(kvs_dir)
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001569 if size >= _CAS_KVS_CACHE_THRESHOLD:
1570 logging.info("remove kvs dir with size: %d", size)
Takuto Ikutab1b70062021-03-22 01:02:41 +00001571 file_path.rmtree(kvs_dir)
Takuto Ikuta7ff4b242020-12-03 08:07:06 +00001572
1573 # Trim first, then clean.
1574 local_caching.trim_caches(
1575 caches,
1576 root,
1577 min_free_space=options.min_free_space,
1578 max_age_secs=MAX_AGE_SECS)
1579 logging.info("free space after trim: %d", file_path.get_free_space(root))
1580 for c in caches:
1581 c.cleanup()
1582 logging.info("free space after cleanup: %d", file_path.get_free_space(root))
1583
1584
aludwin7556e0c2016-10-26 08:46:10 -07001585def main(args):
Marc-Antoine Ruelee6ca622017-11-29 11:19:16 -05001586 # Warning: when --argsfile is used, the strings are unicode instances, when
1587 # parsed normally, the strings are str instances.
aludwin7556e0c2016-10-26 08:46:10 -07001588 (parser, options, args) = parse_args(args)
maruel36a963d2016-04-08 17:15:49 -07001589
Jonah Hooper9b5bd8c2022-07-21 15:33:41 +00001590 # adds another log level for logs which are directed to standard output
1591 # these logs will be uploaded to cloudstorage
1592 logging_utils.set_user_level_logging()
1593
Joanna Wang40959bf2021-08-12 18:10:12 +00001594 # Must be logged after parse_args(), which eventually calls
1595 # logging_utils.prepare_logging() which expects no logs before its call.
Jonah Hooper9b5bd8c2022-07-21 15:33:41 +00001596 logging_utils.user_logs('Starting run_isolated script')
Joanna Wang40959bf2021-08-12 18:10:12 +00001597
Junji Watanabe1d83d282021-05-11 05:50:40 +00001598 SWARMING_SERVER = os.environ.get('SWARMING_SERVER')
1599 SWARMING_TASK_ID = os.environ.get('SWARMING_TASK_ID')
1600 if options.report_on_exception and SWARMING_SERVER:
1601 task_url = None
1602 if SWARMING_TASK_ID:
1603 task_url = '%s/task?id=%s' % (SWARMING_SERVER, SWARMING_TASK_ID)
1604 on_error.report_on_exception_exit(SWARMING_SERVER, source=task_url)
Takuto Ikutad4be2f12020-05-12 02:15:25 +00001605
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001606 if not file_path.enable_symlink():
Marc-Antoine Ruel5a024272019-01-15 20:11:16 +00001607 logging.warning('Symlink support is not enabled')
Marc-Antoine Ruel5028ba22017-08-25 17:37:51 -04001608
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001609 named_cache = process_named_cache_options(parser, options)
Marc-Antoine Ruel0d8b0f62018-09-10 14:40:35 +00001610 # hint is 0 if there's no named cache.
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001611 hint = _calc_named_cache_hint(named_cache, options.named_caches)
1612 if hint:
1613 # Increase the --min-free-space value by the hint, and recreate the
1614 # NamedCache instance so it gets the updated CachePolicy.
1615 options.min_free_space += hint
1616 named_cache = process_named_cache_options(parser, options)
1617
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001618 # TODO(maruel): CIPD caches should be defined at an higher level here too, so
1619 # they can be cleaned the same way.
Takuto Ikutaf1c58442020-10-20 09:03:27 +00001620
Takuto Ikutaf1c58442020-10-20 09:03:27 +00001621 cas_cache = process_cas_cache_options(options)
Takuto Ikuta00cf8fc2020-01-14 01:36:00 +00001622
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001623 caches = []
Junji Watanabeb03450b2020-09-25 05:09:27 +00001624 if cas_cache:
1625 caches.append(cas_cache)
Marc-Antoine Ruel7139d912018-06-15 20:04:42 +00001626 if named_cache:
1627 caches.append(named_cache)
Junji Watanabe7a631b02022-01-13 02:30:29 +00001628 root = caches[0].cache_dir if caches else os.getcwd()
maruel36a963d2016-04-08 17:15:49 -07001629 if options.clean:
Takuto Ikutaae391c52020-12-03 08:43:45 +00001630 _clean_cmd(parser, options, caches, root)
maruel36a963d2016-04-08 17:15:49 -07001631 return 0
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001632
1633 # Trim must still be done for the following case:
1634 # - named-cache was used
1635 # - some entries, with a large hint, where missing
1636 # - --min-free-space was increased accordingly, thus trimming is needed
1637 # Otherwise, this will have no effect, as bot_main calls run_isolated with
1638 # --clean after each task.
Takuto Ikutac9ddff22021-02-18 07:58:39 +00001639 additional_buffer = _FREE_SPACE_BUFFER_FOR_CIPD_PACKAGES
Takuto Ikuta91cb5ca2021-03-17 07:19:30 +00001640 if options.kvs_dir:
Takuto Ikuta7f45c592021-02-09 05:57:05 +00001641 additional_buffer += _CAS_KVS_CACHE_THRESHOLD
Junji Watanabeaee69ad2021-04-28 03:17:34 +00001642 # Add some buffer for Go CLI.
1643 min_free_space = options.min_free_space + additional_buffer
1644
1645 def trim_caches_fn(stats):
1646 start = time.time()
1647 local_caching.trim_caches(
1648 caches, root, min_free_space=min_free_space, max_age_secs=MAX_AGE_SECS)
1649 duration = time.time() - start
1650 stats['duration'] = duration
Jonah Hooper9b5bd8c2022-07-21 15:33:41 +00001651 logging_utils.user_logs('trim_caches: took %d seconds', duration)
maruel36a963d2016-04-08 17:15:49 -07001652
Takuto Ikuta1ce61362021-11-16 05:44:17 +00001653 # Save state of cas cache not to overwrite state from go client.
Takuto Ikutaf1c58442020-10-20 09:03:27 +00001654 if cas_cache:
1655 cas_cache.save()
1656 cas_cache = None
1657
Takuto Ikutadc496672021-11-12 05:58:59 +00001658 if not args:
1659 parser.error('command to run is required.')
nodir55be77b2016-05-03 09:39:57 -07001660
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001661 auth.process_auth_options(parser, options)
nodir55be77b2016-05-03 09:39:57 -07001662
Takuto Ikutaa71c6562021-11-18 06:07:55 +00001663 if ISOLATED_OUTDIR_PARAMETER in args and not options.cas_instance:
1664 parser.error('%s in args requires --cas-instance' %
Junji Watanabeed9ce352020-09-25 12:32:07 +00001665 ISOLATED_OUTDIR_PARAMETER)
1666
nodir90bc8dc2016-06-15 13:35:21 -07001667 if options.root_dir:
Junji Watanabe7a631b02022-01-13 02:30:29 +00001668 options.root_dir = os.path.abspath(options.root_dir)
Takuto Ikutad46ea762020-10-07 05:43:22 +00001669 else:
Junji Watanabe7a631b02022-01-13 02:30:29 +00001670 options.root_dir = tempfile.mkdtemp(prefix='root')
maruel12e30012015-10-09 11:55:35 -07001671 if options.json:
Junji Watanabe7a631b02022-01-13 02:30:29 +00001672 options.json = os.path.abspath(options.json)
nodir55be77b2016-05-03 09:39:57 -07001673
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001674 if any('=' not in i for i in options.env):
1675 parser.error(
1676 '--env required key=value form. value can be skipped to delete '
1677 'the variable')
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -05001678 options.env = dict(i.split('=', 1) for i in options.env)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001679
1680 prefixes = {}
1681 cwd = os.path.realpath(os.getcwd())
1682 for item in options.env_prefix:
1683 if '=' not in item:
1684 parser.error(
1685 '--env-prefix %r is malformed, must be in the form `VAR=./path`'
1686 % item)
Marc-Antoine Ruel7a68f712017-12-01 18:45:18 -05001687 key, opath = item.split('=', 1)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001688 if os.path.isabs(opath):
1689 parser.error('--env-prefix %r path is bad, must be relative.' % opath)
1690 opath = os.path.normpath(opath)
1691 if not os.path.realpath(os.path.join(cwd, opath)).startswith(cwd):
1692 parser.error(
Junji Watanabe38b28b02020-04-23 10:23:30 +00001693 '--env-prefix %r path is bad, must be relative and not contain `..`.'
1694 % opath)
Marc-Antoine Ruel19dd8872017-11-28 18:33:39 -05001695 prefixes.setdefault(key, []).append(opath)
1696 options.env_prefix = prefixes
Robert Iannuccibf5f84c2017-11-22 12:56:50 -08001697
nodirbe642ff2016-06-09 15:51:51 -07001698 cipd.validate_cipd_options(parser, options)
1699
Junji Watanabedc2f89e2021-11-08 08:44:30 +00001700 install_packages_fn = copy_local_packages
Ye Kuang1d096cb2020-06-26 08:38:21 +00001701 tmp_cipd_cache_dir = None
vadimsh902948e2017-01-20 15:57:32 -08001702 if options.cipd_enabled:
Ye Kuang1d096cb2020-06-26 08:38:21 +00001703 cache_dir = options.cipd_cache
1704 if not cache_dir:
Junji Watanabe7a631b02022-01-13 02:30:29 +00001705 tmp_cipd_cache_dir = tempfile.mkdtemp()
Ye Kuang1d096cb2020-06-26 08:38:21 +00001706 cache_dir = tmp_cipd_cache_dir
Takuto Ikuta1ce61362021-11-16 05:44:17 +00001707 install_packages_fn = (
1708 lambda run_dir, cas_dir, nsjail_dir: install_client_and_packages(
1709 run_dir,
1710 cipd.parse_package_args(options.cipd_packages),
1711 options.cipd_server,
1712 options.cipd_client_package,
1713 options.cipd_client_version,
1714 cache_dir=cache_dir,
1715 cas_dir=cas_dir,
1716 nsjail_dir=nsjail_dir,
1717 ))
nodirbe642ff2016-06-09 15:51:51 -07001718
nodird6160682017-02-02 13:03:35 -08001719 @contextlib.contextmanager
Junji Watanabeaee69ad2021-04-28 03:17:34 +00001720 def install_named_caches(run_dir, stats):
nodird6160682017-02-02 13:03:35 -08001721 # WARNING: this function depends on "options" variable defined in the outer
1722 # function.
Junji Watanabe7a631b02022-01-13 02:30:29 +00001723 assert str(run_dir), repr(run_dir)
Marc-Antoine Ruel49f9f8d2018-05-24 15:57:06 -04001724 assert os.path.isabs(run_dir), run_dir
Junji Watanabe7a631b02022-01-13 02:30:29 +00001725 named_caches = [(os.path.join(run_dir, str(relpath)), name)
Takuto Ikuta6e2ff962019-10-29 12:35:27 +00001726 for name, relpath, _ in options.named_caches]
Junji Watanabeaee69ad2021-04-28 03:17:34 +00001727 install_start = time.time()
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001728 for path, name in named_caches:
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001729 named_cache.install(path, name)
Junji Watanabeaee69ad2021-04-28 03:17:34 +00001730 install_duration = time.time() - install_start
1731 stats['install']['duration'] = install_duration
1732 logging.info('named_caches: install took %d seconds', install_duration)
nodird6160682017-02-02 13:03:35 -08001733 try:
1734 yield
1735 finally:
dnje289d132017-07-07 11:16:44 -07001736 # Uninstall each named cache, returning it to the cache pool. If an
1737 # uninstall fails for a given cache, it will remain in the task's
1738 # temporary space, get cleaned up by the Swarming bot, and be lost.
1739 #
1740 # If the Swarming bot cannot clean up the cache, it will handle it like
1741 # any other bot file that could not be removed.
Junji Watanabeaee69ad2021-04-28 03:17:34 +00001742 uninstall_start = time.time()
Marc-Antoine Ruelc7a704b2018-08-29 19:02:23 +00001743 for path, name in reversed(named_caches):
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001744 try:
Marc-Antoine Ruele9558372018-08-03 03:41:22 +00001745 # uninstall() doesn't trim but does call save() implicitly. Trimming
1746 # *must* be done manually via periodic 'run_isolated.py --clean'.
Marc-Antoine Ruele79ddbf2018-06-13 18:33:07 +00001747 named_cache.uninstall(path, name)
1748 except local_caching.NamedCacheError:
Takuto Ikuta463ecdd2021-03-05 09:35:38 +00001749 if sys.platform == 'win32':
1750 # Show running processes.
1751 sys.stderr.write("running process\n")
1752 subprocess42.check_call(['tasklist.exe', '/V'], stdout=sys.stderr)
1753
Junji Watanabed2ab86b2021-08-13 07:20:23 +00001754 error = (
1755 'Error while removing named cache %r at %r. The cache will be'
1756 ' lost.' % (path, name))
1757 logging.exception(error)
1758 on_error.report(error)
Junji Watanabeaee69ad2021-04-28 03:17:34 +00001759 uninstall_duration = time.time() - uninstall_start
1760 stats['uninstall']['duration'] = uninstall_duration
1761 logging.info('named_caches: uninstall took %d seconds',
1762 uninstall_duration)
nodirf33b8d62016-10-26 22:34:58 -07001763
Takuto Ikutaf3caa9b2020-11-02 05:38:26 +00001764 command = args
1765 if options.relative_cwd:
1766 a = os.path.normpath(os.path.abspath(options.relative_cwd))
1767 if not a.startswith(os.getcwd()):
1768 parser.error(
1769 '--relative-cwd must not try to escape the working directory')
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001770
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001771 containment_type = subprocess42.Containment.NONE
1772 if options.containment_type == 'AUTO':
1773 containment_type = subprocess42.Containment.AUTO
1774 if options.containment_type == 'JOB_OBJECT':
1775 containment_type = subprocess42.Containment.JOB_OBJECT
Anirudh Mathukumilli92d57b62021-08-04 23:21:57 +00001776 if options.containment_type == 'NSJAIL':
1777 containment_type = subprocess42.Containment.NSJAIL
1778 # TODO(https://crbug.com/1227833): This object should eventually contain the
1779 # path to the nsjail binary and the nsjail configuration file.
Marc-Antoine Ruel1b65f4e2019-05-02 21:56:58 +00001780 containment = subprocess42.Containment(
1781 containment_type=containment_type,
1782 limit_processes=options.limit_processes,
1783 limit_total_committed_memory=options.limit_total_committed_memory)
1784
Junji Watanabe7a631b02022-01-13 02:30:29 +00001785 data = TaskData(command=command,
1786 relative_cwd=options.relative_cwd,
1787 cas_instance=options.cas_instance,
1788 cas_digest=options.cas_digest,
1789 outputs=options.output,
1790 install_named_caches=install_named_caches,
1791 leak_temp_dir=options.leak_temp_dir,
1792 root_dir=options.root_dir,
1793 hard_timeout=options.hard_timeout,
1794 grace_period=options.grace_period,
1795 bot_file=options.bot_file,
1796 switch_to_account=options.switch_to_account,
1797 install_packages_fn=install_packages_fn,
1798 cas_cache_dir=options.cas_cache,
1799 cas_cache_policies=local_caching.CachePolicies(
1800 max_cache_size=options.max_cache_size,
1801 min_free_space=options.min_free_space,
1802 max_items=None,
1803 max_age_secs=None,
1804 ),
1805 cas_kvs=options.kvs_dir,
1806 env=options.env,
1807 env_prefix=options.env_prefix,
1808 lower_priority=bool(options.lower_priority),
1809 containment=containment,
1810 trim_caches_fn=trim_caches_fn)
nodirbe642ff2016-06-09 15:51:51 -07001811 try:
Marc-Antoine Ruel7de52592017-12-07 10:41:12 -05001812 return run_tha_test(data, options.json)
Junji Watanabe38b28b02020-04-23 10:23:30 +00001813 except (cipd.Error, local_caching.NamedCacheError,
1814 local_caching.NoMoreSpace) as ex:
Marc-Antoine Ruelf899c482019-10-10 23:32:06 +00001815 print(ex.message, file=sys.stderr)
Junji Watanabed2ab86b2021-08-13 07:20:23 +00001816 on_error.report(None)
nodirbe642ff2016-06-09 15:51:51 -07001817 return 1
Ye Kuang1d096cb2020-06-26 08:38:21 +00001818 finally:
1819 if tmp_cipd_cache_dir is not None:
1820 try:
1821 file_path.rmtree(tmp_cipd_cache_dir)
1822 except OSError:
1823 logging.exception('Remove tmp_cipd_cache_dir=%s failed',
1824 tmp_cipd_cache_dir)
1825 # Best effort clean up. Failed to do so doesn't affect the outcome.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001826
1827
1828if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -07001829 subprocess42.inhibit_os_error_reporting()
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001830 # Ensure that we are always running with the correct encoding.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001831 fix_encoding.fix_encoding()
Ye Kuang2dd17442020-04-22 08:45:52 +00001832 net.set_user_agent('run_isolated.py/' + __version__)
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001833 sys.exit(main(sys.argv[1:]))