blob: 8bbb6d233380d96d1fe035ca500b7e65c96fa877 [file] [log] [blame]
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001#!/usr/bin/env python
maruelea586f32016-04-05 11:11:33 -07002# Copyright 2012 The LUCI Authors. All rights reserved.
maruelf1f5e2a2016-05-25 17:10:39 -07003# Use of this source code is governed under the Apache License, Version 2.0
4# that can be found in the LICENSE file.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00005
nodir55be77b2016-05-03 09:39:57 -07006"""Runs a command with optional isolated input/output.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00007
nodir55be77b2016-05-03 09:39:57 -07008Despite name "run_isolated", can run a generic non-isolated command specified as
9args.
10
11If input isolated hash is provided, fetches it, creates a tree of hard links,
12appends args to the command in the fetched isolated and runs it.
13To improve performance, keeps a local cache.
14The local cache can safely be deleted.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050015
nodirbe642ff2016-06-09 15:51:51 -070016Any ${EXECUTABLE_SUFFIX} on the command line will be replaced with ".exe" string
17on Windows and "" on other platforms.
18
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -050019Any ${ISOLATED_OUTDIR} on the command line will be replaced by the location of a
20temporary directory upon execution of the command specified in the .isolated
21file. All content written to this directory will be uploaded upon termination
22and the .isolated file describing this directory will be printed to stdout.
bpastene447c1992016-06-20 15:21:47 -070023
24Any ${SWARMING_BOT_FILE} on the command line will be replaced by the value of
25the --bot-file parameter. This file is used by a swarming bot to communicate
26state of the host to tasks. It is written to by the swarming bot's
27on_before_task() hook in the swarming server's custom bot_config.py.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000028"""
29
maruelcffa0542017-04-07 08:39:20 -070030__version__ = '0.9.1'
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000031
aludwin7556e0c2016-10-26 08:46:10 -070032import argparse
maruel064c0a32016-04-05 11:47:15 -070033import base64
iannucci96fcccc2016-08-30 15:52:22 -070034import collections
vadimsh232f5a82017-01-20 19:23:44 -080035import contextlib
aludwin7556e0c2016-10-26 08:46:10 -070036import json
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000037import logging
38import optparse
39import os
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000040import sys
41import tempfile
maruel064c0a32016-04-05 11:47:15 -070042import time
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +000043
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000044from third_party.depot_tools import fix_encoding
45
Vadim Shtayura6b555c12014-07-23 16:22:18 -070046from utils import file_path
maruel12e30012015-10-09 11:55:35 -070047from utils import fs
maruel064c0a32016-04-05 11:47:15 -070048from utils import large
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -040049from utils import logging_utils
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040050from utils import on_error
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -050051from utils import subprocess42
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000052from utils import tools
vadimsh@chromium.org3e97deb2013-08-24 00:56:44 +000053from utils import zip_package
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000054
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080055import auth
nodirbe642ff2016-06-09 15:51:51 -070056import cipd
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000057import isolateserver
nodirf33b8d62016-10-26 22:34:58 -070058import named_cache
maruel@chromium.orgdedbf492013-09-12 20:42:11 +000059
vadimsh@chromium.orga4326472013-08-24 02:05:41 +000060
vadimsh@chromium.org85071062013-08-21 23:37:45 +000061# Absolute path to this file (can be None if running from zip on Mac).
tansella4949442016-06-23 22:34:32 -070062THIS_FILE_PATH = os.path.abspath(
63 __file__.decode(sys.getfilesystemencoding())) if __file__ else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000064
65# Directory that contains this file (might be inside zip package).
tansella4949442016-06-23 22:34:32 -070066BASE_DIR = os.path.dirname(THIS_FILE_PATH) if __file__.decode(
67 sys.getfilesystemencoding()) else None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000068
69# Directory that contains currently running script file.
maruel@chromium.org814d23f2013-10-01 19:08:00 +000070if zip_package.get_main_script_path():
71 MAIN_DIR = os.path.dirname(
72 os.path.abspath(zip_package.get_main_script_path()))
73else:
74 # This happens when 'import run_isolated' is executed at the python
75 # interactive prompt, in that case __file__ is undefined.
76 MAIN_DIR = None
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000077
maruele2f2cb82016-07-13 14:41:03 -070078
79# Magic variables that can be found in the isolate task command line.
80ISOLATED_OUTDIR_PARAMETER = '${ISOLATED_OUTDIR}'
81EXECUTABLE_SUFFIX_PARAMETER = '${EXECUTABLE_SUFFIX}'
82SWARMING_BOT_FILE_PARAMETER = '${SWARMING_BOT_FILE}'
83
84
csharp@chromium.orgff2a4662012-11-21 20:49:32 +000085# The name of the log file to use.
86RUN_ISOLATED_LOG_FILE = 'run_isolated.log'
87
maruele2f2cb82016-07-13 14:41:03 -070088
csharp@chromium.orge217f302012-11-22 16:51:53 +000089# The name of the log to use for the run_test_cases.py command
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +000090RUN_TEST_CASES_LOG = 'run_test_cases.log'
csharp@chromium.orge217f302012-11-22 16:51:53 +000091
vadimsh@chromium.org87d63262013-04-04 19:34:21 +000092
maruele2f2cb82016-07-13 14:41:03 -070093# Use short names for temporary directories. This is driven by Windows, which
94# imposes a relatively short maximum path length of 260 characters, often
95# referred to as MAX_PATH. It is relatively easy to create files with longer
96# path length. A use case is with recursive depedency treesV like npm packages.
97#
98# It is recommended to start the script with a `root_dir` as short as
99# possible.
100# - ir stands for isolated_run
101# - io stands for isolated_out
102# - it stands for isolated_tmp
103ISOLATED_RUN_DIR = u'ir'
104ISOLATED_OUT_DIR = u'io'
105ISOLATED_TMP_DIR = u'it'
106
107
maruel7eb6b562017-06-08 08:20:04 -0700108OUTLIVING_ZOMBIE_MSG = """\
109*** Swarming tried multiple times to delete the %s directory and failed ***
110*** Hard failing the task ***
111
112Swarming detected that your testing script ran an executable, which may have
113started a child executable, and the main script returned early, leaving the
114children executables playing around unguided.
115
116You don't want to leave children processes outliving the task on the Swarming
117bot, do you? The Swarming bot doesn't.
118
119How to fix?
120- For any process that starts children processes, make sure all children
121 processes terminated properly before each parent process exits. This is
122 especially important in very deep process trees.
123 - This must be done properly both in normal successful task and in case of
124 task failure. Cleanup is very important.
125- The Swarming bot sends a SIGTERM in case of timeout.
126 - You have %s seconds to comply after the signal was sent to the process
127 before the process is forcibly killed.
128- To achieve not leaking children processes in case of signals on timeout, you
129 MUST handle signals in each executable / python script and propagate them to
130 children processes.
131 - When your test script (python or binary) receives a signal like SIGTERM or
132 CTRL_BREAK_EVENT on Windows), send it to all children processes and wait for
133 them to terminate before quitting.
134
135See
136https://github.com/luci/luci-py/blob/master/appengine/swarming/doc/Bot.md#graceful-termination-aka-the-sigterm-and-sigkill-dance
137for more information.
138
139*** May the SIGKILL force be with you ***
140"""
141
142
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000143def get_as_zip_package(executable=True):
144 """Returns ZipPackage with this module and all its dependencies.
145
146 If |executable| is True will store run_isolated.py as __main__.py so that
147 zip package is directly executable be python.
148 """
149 # Building a zip package when running from another zip package is
150 # unsupported and probably unneeded.
151 assert not zip_package.is_zipped_module(sys.modules[__name__])
vadimsh@chromium.org85071062013-08-21 23:37:45 +0000152 assert THIS_FILE_PATH
153 assert BASE_DIR
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000154 package = zip_package.ZipPackage(root=BASE_DIR)
155 package.add_python_file(THIS_FILE_PATH, '__main__.py' if executable else None)
aludwin81178302016-11-30 17:18:49 -0800156 package.add_python_file(os.path.join(BASE_DIR, 'isolate_storage.py'))
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -0400157 package.add_python_file(os.path.join(BASE_DIR, 'isolated_format.py'))
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000158 package.add_python_file(os.path.join(BASE_DIR, 'isolateserver.py'))
Vadim Shtayurae34e13a2014-02-02 11:23:26 -0800159 package.add_python_file(os.path.join(BASE_DIR, 'auth.py'))
nodirbe642ff2016-06-09 15:51:51 -0700160 package.add_python_file(os.path.join(BASE_DIR, 'cipd.py'))
nodirf33b8d62016-10-26 22:34:58 -0700161 package.add_python_file(os.path.join(BASE_DIR, 'named_cache.py'))
tanselle4288c32016-07-28 09:45:40 -0700162 package.add_directory(os.path.join(BASE_DIR, 'libs'))
vadimsh@chromium.org8b9d56b2013-08-21 22:24:35 +0000163 package.add_directory(os.path.join(BASE_DIR, 'third_party'))
164 package.add_directory(os.path.join(BASE_DIR, 'utils'))
165 return package
166
167
maruel03e11842016-07-14 10:50:16 -0700168def make_temp_dir(prefix, root_dir):
169 """Returns a new unique temporary directory."""
170 return unicode(tempfile.mkdtemp(prefix=prefix, dir=root_dir))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000171
172
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500173def change_tree_read_only(rootdir, read_only):
174 """Changes the tree read-only bits according to the read_only specification.
175
176 The flag can be 0, 1 or 2, which will affect the possibility to modify files
177 and create or delete files.
178 """
179 if read_only == 2:
180 # Files and directories (except on Windows) are marked read only. This
181 # inhibits modifying, creating or deleting files in the test directory,
182 # except on Windows where creating and deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400183 file_path.make_tree_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500184 elif read_only == 1:
185 # Files are marked read only but not the directories. This inhibits
186 # modifying files but creating or deleting files is still possible.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400187 file_path.make_tree_files_read_only(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500188 elif read_only in (0, None):
Marc-Antoine Ruelf1d827c2014-11-24 15:22:25 -0500189 # Anything can be modified.
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500190 # TODO(maruel): This is currently dangerous as long as DiskCache.touch()
191 # is not yet changed to verify the hash of the content of the files it is
192 # looking at, so that if a test modifies an input file, the file must be
193 # deleted.
Marc-Antoine Ruele4ad07e2014-10-15 20:22:29 -0400194 file_path.make_tree_writeable(rootdir)
Marc-Antoine Ruel7124e392014-01-09 11:49:21 -0500195 else:
196 raise ValueError(
197 'change_tree_read_only(%s, %s): Unknown flag %s' %
198 (rootdir, read_only, read_only))
199
200
nodir90bc8dc2016-06-15 13:35:21 -0700201def process_command(command, out_dir, bot_file):
nodirbe642ff2016-06-09 15:51:51 -0700202 """Replaces variables in a command line.
203
204 Raises:
205 ValueError if a parameter is requested in |command| but its value is not
206 provided.
207 """
maruela9cfd6f2015-09-15 11:03:15 -0700208 def fix(arg):
nodirbe642ff2016-06-09 15:51:51 -0700209 arg = arg.replace(EXECUTABLE_SUFFIX_PARAMETER, cipd.EXECUTABLE_SUFFIX)
210 replace_slash = False
nodir55be77b2016-05-03 09:39:57 -0700211 if ISOLATED_OUTDIR_PARAMETER in arg:
nodirbe642ff2016-06-09 15:51:51 -0700212 if not out_dir:
maruel7f63a272016-07-12 12:40:36 -0700213 raise ValueError(
214 'output directory is requested in command, but not provided; '
215 'please specify one')
nodir55be77b2016-05-03 09:39:57 -0700216 arg = arg.replace(ISOLATED_OUTDIR_PARAMETER, out_dir)
nodirbe642ff2016-06-09 15:51:51 -0700217 replace_slash = True
nodir90bc8dc2016-06-15 13:35:21 -0700218 if SWARMING_BOT_FILE_PARAMETER in arg:
219 if bot_file:
220 arg = arg.replace(SWARMING_BOT_FILE_PARAMETER, bot_file)
221 replace_slash = True
222 else:
223 logging.warning('SWARMING_BOT_FILE_PARAMETER found in command, but no '
224 'bot_file specified. Leaving parameter unchanged.')
nodirbe642ff2016-06-09 15:51:51 -0700225 if replace_slash:
226 # Replace slashes only if parameters are present
nodir55be77b2016-05-03 09:39:57 -0700227 # because of arguments like '${ISOLATED_OUTDIR}/foo/bar'
228 arg = arg.replace('/', os.sep)
maruela9cfd6f2015-09-15 11:03:15 -0700229 return arg
230
231 return [fix(arg) for arg in command]
232
233
vadimsh232f5a82017-01-20 19:23:44 -0800234def get_command_env(tmp_dir, cipd_info):
235 """Returns full OS environment to run a command in.
236
237 Sets up TEMP, puts directory with cipd binary in front of PATH, and exposes
238 CIPD_CACHE_DIR env var.
239
240 Args:
241 tmp_dir: temp directory.
242 cipd_info: CipdInfo object is cipd client is used, None if not.
243 """
244 def to_fs_enc(s):
245 if isinstance(s, str):
246 return s
247 return s.encode(sys.getfilesystemencoding())
248
249 env = os.environ.copy()
250
iannucciac0342c2017-02-24 05:47:01 -0800251 # TMPDIR is specified as the POSIX standard envvar for the temp directory.
iannucci460def72017-02-24 10:49:48 -0800252 # * mktemp on linux respects $TMPDIR, not $TMP
253 # * mktemp on OS X SOMETIMES respects $TMPDIR
iannucciac0342c2017-02-24 05:47:01 -0800254 # * chromium's base utils respects $TMPDIR on linux, $TEMP on windows.
255 # Unfortunately at the time of writing it completely ignores all envvars
256 # on OS X.
iannucci460def72017-02-24 10:49:48 -0800257 # * python respects TMPDIR, TEMP, and TMP (regardless of platform)
258 # * golang respects TMPDIR on linux+mac, TEMP on windows.
iannucciac0342c2017-02-24 05:47:01 -0800259 key = {'win32': 'TEMP'}.get(sys.platform, 'TMPDIR')
vadimsh232f5a82017-01-20 19:23:44 -0800260 env[key] = to_fs_enc(tmp_dir)
261
262 if cipd_info:
263 bin_dir = os.path.dirname(cipd_info.client.binary_path)
264 env['PATH'] = '%s%s%s' % (to_fs_enc(bin_dir), os.pathsep, env['PATH'])
265 env['CIPD_CACHE_DIR'] = to_fs_enc(cipd_info.cache_dir)
266
267 return env
268
269
270def run_command(command, cwd, env, hard_timeout, grace_period):
maruel6be7f9e2015-10-01 12:25:30 -0700271 """Runs the command.
272
273 Returns:
274 tuple(process exit code, bool if had a hard timeout)
275 """
maruela9cfd6f2015-09-15 11:03:15 -0700276 logging.info('run_command(%s, %s)' % (command, cwd))
marueleb5fbee2015-09-17 13:01:36 -0700277
maruel6be7f9e2015-10-01 12:25:30 -0700278 exit_code = None
279 had_hard_timeout = False
maruela9cfd6f2015-09-15 11:03:15 -0700280 with tools.Profiler('RunTest'):
maruel6be7f9e2015-10-01 12:25:30 -0700281 proc = None
282 had_signal = []
maruela9cfd6f2015-09-15 11:03:15 -0700283 try:
maruel6be7f9e2015-10-01 12:25:30 -0700284 # TODO(maruel): This code is imperfect. It doesn't handle well signals
285 # during the download phase and there's short windows were things can go
286 # wrong.
287 def handler(signum, _frame):
288 if proc and not had_signal:
289 logging.info('Received signal %d', signum)
290 had_signal.append(True)
maruel556d9052015-10-05 11:12:44 -0700291 raise subprocess42.TimeoutExpired(command, None)
maruel6be7f9e2015-10-01 12:25:30 -0700292
293 proc = subprocess42.Popen(command, cwd=cwd, env=env, detached=True)
294 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
295 try:
296 exit_code = proc.wait(hard_timeout or None)
297 except subprocess42.TimeoutExpired:
298 if not had_signal:
299 logging.warning('Hard timeout')
300 had_hard_timeout = True
301 logging.warning('Sending SIGTERM')
302 proc.terminate()
303
304 # Ignore signals in grace period. Forcibly give the grace period to the
305 # child process.
306 if exit_code is None:
307 ignore = lambda *_: None
308 with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
309 try:
310 exit_code = proc.wait(grace_period or None)
311 except subprocess42.TimeoutExpired:
312 # Now kill for real. The user can distinguish between the
313 # following states:
314 # - signal but process exited within grace period,
315 # hard_timed_out will be set but the process exit code will be
316 # script provided.
317 # - processed exited late, exit code will be -9 on posix.
318 logging.warning('Grace exhausted; sending SIGKILL')
319 proc.kill()
320 logging.info('Waiting for proces exit')
321 exit_code = proc.wait()
maruela9cfd6f2015-09-15 11:03:15 -0700322 except OSError:
323 # This is not considered to be an internal error. The executable simply
324 # does not exit.
maruela72f46e2016-02-24 11:05:45 -0800325 sys.stderr.write(
326 '<The executable does not exist or a dependent library is missing>\n'
327 '<Check for missing .so/.dll in the .isolate or GN file>\n'
328 '<Command: %s>\n' % command)
329 if os.environ.get('SWARMING_TASK_ID'):
330 # Give an additional hint when running as a swarming task.
331 sys.stderr.write(
332 '<See the task\'s page for commands to help diagnose this issue '
333 'by reproducing the task locally>\n')
maruela9cfd6f2015-09-15 11:03:15 -0700334 exit_code = 1
335 logging.info(
336 'Command finished with exit code %d (%s)',
337 exit_code, hex(0xffffffff & exit_code))
maruel6be7f9e2015-10-01 12:25:30 -0700338 return exit_code, had_hard_timeout
maruela9cfd6f2015-09-15 11:03:15 -0700339
340
maruel4409e302016-07-19 14:25:51 -0700341def fetch_and_map(isolated_hash, storage, cache, outdir, use_symlinks):
342 """Fetches an isolated tree, create the tree and returns (bundle, stats)."""
nodir6f801882016-04-29 14:41:50 -0700343 start = time.time()
344 bundle = isolateserver.fetch_isolated(
345 isolated_hash=isolated_hash,
346 storage=storage,
347 cache=cache,
maruel4409e302016-07-19 14:25:51 -0700348 outdir=outdir,
349 use_symlinks=use_symlinks)
nodir6f801882016-04-29 14:41:50 -0700350 return bundle, {
351 'duration': time.time() - start,
352 'initial_number_items': cache.initial_number_items,
353 'initial_size': cache.initial_size,
354 'items_cold': base64.b64encode(large.pack(sorted(cache.added))),
355 'items_hot': base64.b64encode(
tansell9e04a8d2016-07-28 09:31:59 -0700356 large.pack(sorted(set(cache.used) - set(cache.added)))),
nodir6f801882016-04-29 14:41:50 -0700357 }
358
359
aludwin0a8e17d2016-10-27 15:57:39 -0700360def link_outputs_to_outdir(run_dir, out_dir, outputs):
361 """Links any named outputs to out_dir so they can be uploaded.
362
363 Raises an error if the file already exists in that directory.
364 """
365 if not outputs:
366 return
367 isolateserver.create_directories(out_dir, outputs)
368 for o in outputs:
369 try:
aludwinb35146d2017-06-12 06:03:00 -0700370 infile = os.path.join(run_dir, o)
371 outfile = os.path.join(out_dir, o)
372 if fs.islink(infile):
373 # TODO(aludwin): handle directories
374 fs.copy2(infile, outfile)
375 else:
376 file_path.link_file(outfile, infile, file_path.HARDLINK_WITH_FALLBACK)
aludwin0a8e17d2016-10-27 15:57:39 -0700377 except OSError as e:
aludwin81178302016-11-30 17:18:49 -0800378 logging.info("Couldn't collect output file %s: %s", o, e)
aludwin0a8e17d2016-10-27 15:57:39 -0700379
380
maruela9cfd6f2015-09-15 11:03:15 -0700381def delete_and_upload(storage, out_dir, leak_temp_dir):
382 """Deletes the temporary run directory and uploads results back.
383
384 Returns:
nodir6f801882016-04-29 14:41:50 -0700385 tuple(outputs_ref, success, stats)
maruel064c0a32016-04-05 11:47:15 -0700386 - outputs_ref: a dict referring to the results archived back to the isolated
387 server, if applicable.
388 - success: False if something occurred that means that the task must
389 forcibly be considered a failure, e.g. zombie processes were left
390 behind.
nodir6f801882016-04-29 14:41:50 -0700391 - stats: uploading stats.
maruela9cfd6f2015-09-15 11:03:15 -0700392 """
maruela9cfd6f2015-09-15 11:03:15 -0700393 # Upload out_dir and generate a .isolated file out of this directory. It is
394 # only done if files were written in the directory.
395 outputs_ref = None
maruel064c0a32016-04-05 11:47:15 -0700396 cold = []
397 hot = []
nodir6f801882016-04-29 14:41:50 -0700398 start = time.time()
399
maruel12e30012015-10-09 11:55:35 -0700400 if fs.isdir(out_dir) and fs.listdir(out_dir):
maruela9cfd6f2015-09-15 11:03:15 -0700401 with tools.Profiler('ArchiveOutput'):
402 try:
maruel064c0a32016-04-05 11:47:15 -0700403 results, f_cold, f_hot = isolateserver.archive_files_to_storage(
maruela9cfd6f2015-09-15 11:03:15 -0700404 storage, [out_dir], None)
405 outputs_ref = {
406 'isolated': results[0][0],
407 'isolatedserver': storage.location,
408 'namespace': storage.namespace,
409 }
maruel064c0a32016-04-05 11:47:15 -0700410 cold = sorted(i.size for i in f_cold)
411 hot = sorted(i.size for i in f_hot)
maruela9cfd6f2015-09-15 11:03:15 -0700412 except isolateserver.Aborted:
413 # This happens when a signal SIGTERM was received while uploading data.
414 # There is 2 causes:
415 # - The task was too slow and was about to be killed anyway due to
416 # exceeding the hard timeout.
417 # - The amount of data uploaded back is very large and took too much
418 # time to archive.
419 sys.stderr.write('Received SIGTERM while uploading')
420 # Re-raise, so it will be treated as an internal failure.
421 raise
nodir6f801882016-04-29 14:41:50 -0700422
423 success = False
maruela9cfd6f2015-09-15 11:03:15 -0700424 try:
maruel12e30012015-10-09 11:55:35 -0700425 if (not leak_temp_dir and fs.isdir(out_dir) and
maruel6eeea7d2015-09-16 12:17:42 -0700426 not file_path.rmtree(out_dir)):
maruela9cfd6f2015-09-15 11:03:15 -0700427 logging.error('Had difficulties removing out_dir %s', out_dir)
nodir6f801882016-04-29 14:41:50 -0700428 else:
429 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700430 except OSError as e:
431 # When this happens, it means there's a process error.
maruel12e30012015-10-09 11:55:35 -0700432 logging.exception('Had difficulties removing out_dir %s: %s', out_dir, e)
nodir6f801882016-04-29 14:41:50 -0700433 stats = {
434 'duration': time.time() - start,
435 'items_cold': base64.b64encode(large.pack(cold)),
436 'items_hot': base64.b64encode(large.pack(hot)),
437 }
438 return outputs_ref, success, stats
maruela9cfd6f2015-09-15 11:03:15 -0700439
440
marueleb5fbee2015-09-17 13:01:36 -0700441def map_and_run(
nodir26251c42017-05-11 13:21:53 -0700442 command, isolated_hash, storage, isolate_cache, outputs,
443 install_named_caches, leak_temp_dir, root_dir, hard_timeout, grace_period,
444 bot_file, install_packages_fn, use_symlinks, constant_run_path):
nodir55be77b2016-05-03 09:39:57 -0700445 """Runs a command with optional isolated input/output.
446
447 See run_tha_test for argument documentation.
448
449 Returns metadata about the result.
450 """
maruelabec63c2017-04-26 11:53:24 -0700451 assert isinstance(command, list), command
nodir56efa452016-10-12 12:17:39 -0700452 assert root_dir or root_dir is None
maruela9cfd6f2015-09-15 11:03:15 -0700453 result = {
maruel064c0a32016-04-05 11:47:15 -0700454 'duration': None,
maruela9cfd6f2015-09-15 11:03:15 -0700455 'exit_code': None,
maruel6be7f9e2015-10-01 12:25:30 -0700456 'had_hard_timeout': False,
maruela9cfd6f2015-09-15 11:03:15 -0700457 'internal_failure': None,
maruel064c0a32016-04-05 11:47:15 -0700458 'stats': {
nodir55715712016-06-03 12:28:19 -0700459 # 'isolated': {
nodirbe642ff2016-06-09 15:51:51 -0700460 # 'cipd': {
461 # 'duration': 0.,
462 # 'get_client_duration': 0.,
463 # },
nodir55715712016-06-03 12:28:19 -0700464 # 'download': {
465 # 'duration': 0.,
466 # 'initial_number_items': 0,
467 # 'initial_size': 0,
468 # 'items_cold': '<large.pack()>',
469 # 'items_hot': '<large.pack()>',
470 # },
471 # 'upload': {
472 # 'duration': 0.,
473 # 'items_cold': '<large.pack()>',
474 # 'items_hot': '<large.pack()>',
475 # },
maruel064c0a32016-04-05 11:47:15 -0700476 # },
477 },
iannucci96fcccc2016-08-30 15:52:22 -0700478 # 'cipd_pins': {
479 # 'packages': [
480 # {'package_name': ..., 'version': ..., 'path': ...},
481 # ...
482 # ],
483 # 'client_package': {'package_name': ..., 'version': ...},
484 # },
maruela9cfd6f2015-09-15 11:03:15 -0700485 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700486 'version': 5,
maruela9cfd6f2015-09-15 11:03:15 -0700487 }
nodirbe642ff2016-06-09 15:51:51 -0700488
marueleb5fbee2015-09-17 13:01:36 -0700489 if root_dir:
nodire5028a92016-04-29 14:38:21 -0700490 file_path.ensure_tree(root_dir, 0700)
nodir56efa452016-10-12 12:17:39 -0700491 elif isolate_cache.cache_dir:
492 root_dir = os.path.dirname(isolate_cache.cache_dir)
maruele2f2cb82016-07-13 14:41:03 -0700493 # See comment for these constants.
maruelcffa0542017-04-07 08:39:20 -0700494 # If root_dir is not specified, it is not constant.
495 # TODO(maruel): This is not obvious. Change this to become an error once we
496 # make the constant_run_path an exposed flag.
497 if constant_run_path and root_dir:
498 run_dir = os.path.join(root_dir, ISOLATED_RUN_DIR)
maruel13437a72017-05-26 05:33:40 -0700499 if os.path.isdir(run_dir):
500 file_path.rmtree(run_dir)
maruelcffa0542017-04-07 08:39:20 -0700501 os.mkdir(run_dir)
502 else:
503 run_dir = make_temp_dir(ISOLATED_RUN_DIR, root_dir)
maruel03e11842016-07-14 10:50:16 -0700504 # storage should be normally set but don't crash if it is not. This can happen
505 # as Swarming task can run without an isolate server.
maruele2f2cb82016-07-13 14:41:03 -0700506 out_dir = make_temp_dir(ISOLATED_OUT_DIR, root_dir) if storage else None
507 tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, root_dir)
nodir55be77b2016-05-03 09:39:57 -0700508 cwd = run_dir
maruela9cfd6f2015-09-15 11:03:15 -0700509
nodir55be77b2016-05-03 09:39:57 -0700510 try:
vadimsh232f5a82017-01-20 19:23:44 -0800511 with install_packages_fn(run_dir) as cipd_info:
512 if cipd_info:
513 result['stats']['cipd'] = cipd_info.stats
514 result['cipd_pins'] = cipd_info.pins
nodir90bc8dc2016-06-15 13:35:21 -0700515
vadimsh232f5a82017-01-20 19:23:44 -0800516 if isolated_hash:
517 isolated_stats = result['stats'].setdefault('isolated', {})
518 bundle, isolated_stats['download'] = fetch_and_map(
519 isolated_hash=isolated_hash,
520 storage=storage,
521 cache=isolate_cache,
522 outdir=run_dir,
523 use_symlinks=use_symlinks)
vadimsh232f5a82017-01-20 19:23:44 -0800524 change_tree_read_only(run_dir, bundle.read_only)
525 cwd = os.path.normpath(os.path.join(cwd, bundle.relative_cwd))
maruelabec63c2017-04-26 11:53:24 -0700526 # Inject the command
527 if bundle.command:
528 command = bundle.command + command
529
530 if not command:
531 # Handle this as a task failure, not an internal failure.
532 sys.stderr.write(
533 '<No command was specified!>\n'
534 '<Please secify a command when triggering your Swarming task>\n')
535 result['exit_code'] = 1
536 return result
nodirbe642ff2016-06-09 15:51:51 -0700537
vadimsh232f5a82017-01-20 19:23:44 -0800538 # If we have an explicit list of files to return, make sure their
539 # directories exist now.
540 if storage and outputs:
541 isolateserver.create_directories(run_dir, outputs)
aludwin0a8e17d2016-10-27 15:57:39 -0700542
vadimsh232f5a82017-01-20 19:23:44 -0800543 command = tools.fix_python_path(command)
544 command = process_command(command, out_dir, bot_file)
545 file_path.ensure_command_has_abs_path(command, cwd)
nodirbe642ff2016-06-09 15:51:51 -0700546
nodir26251c42017-05-11 13:21:53 -0700547 with install_named_caches(run_dir):
nodird6160682017-02-02 13:03:35 -0800548 sys.stdout.flush()
549 start = time.time()
550 try:
551 result['exit_code'], result['had_hard_timeout'] = run_command(
552 command, cwd, get_command_env(tmp_dir, cipd_info),
553 hard_timeout, grace_period)
554 finally:
555 result['duration'] = max(time.time() - start, 0)
maruela9cfd6f2015-09-15 11:03:15 -0700556 except Exception as e:
nodir90bc8dc2016-06-15 13:35:21 -0700557 # An internal error occurred. Report accordingly so the swarming task will
558 # be retried automatically.
maruel12e30012015-10-09 11:55:35 -0700559 logging.exception('internal failure: %s', e)
maruela9cfd6f2015-09-15 11:03:15 -0700560 result['internal_failure'] = str(e)
561 on_error.report(None)
aludwin0a8e17d2016-10-27 15:57:39 -0700562
563 # Clean up
maruela9cfd6f2015-09-15 11:03:15 -0700564 finally:
565 try:
aludwin0a8e17d2016-10-27 15:57:39 -0700566 # Try to link files to the output directory, if specified.
567 if out_dir:
568 link_outputs_to_outdir(run_dir, out_dir, outputs)
569
nodir32a1ec12016-10-26 18:34:07 -0700570 success = False
maruela9cfd6f2015-09-15 11:03:15 -0700571 if leak_temp_dir:
nodir32a1ec12016-10-26 18:34:07 -0700572 success = True
maruela9cfd6f2015-09-15 11:03:15 -0700573 logging.warning(
574 'Deliberately leaking %s for later examination', run_dir)
marueleb5fbee2015-09-17 13:01:36 -0700575 else:
maruel84537cb2015-10-16 14:21:28 -0700576 # On Windows rmtree(run_dir) call above has a synchronization effect: it
577 # finishes only when all task child processes terminate (since a running
578 # process locks *.exe file). Examine out_dir only after that call
579 # completes (since child processes may write to out_dir too and we need
580 # to wait for them to finish).
581 if fs.isdir(run_dir):
582 try:
583 success = file_path.rmtree(run_dir)
584 except OSError as e:
585 logging.error('Failure with %s', e)
586 success = False
587 if not success:
maruel7eb6b562017-06-08 08:20:04 -0700588 sys.stderr.write(OUTLIVING_ZOMBIE_MSG % ('run', grace_period))
maruel84537cb2015-10-16 14:21:28 -0700589 if result['exit_code'] == 0:
590 result['exit_code'] = 1
591 if fs.isdir(tmp_dir):
592 try:
593 success = file_path.rmtree(tmp_dir)
594 except OSError as e:
595 logging.error('Failure with %s', e)
596 success = False
597 if not success:
maruel9832b052017-06-08 13:06:40 -0700598 sys.stderr.write(OUTLIVING_ZOMBIE_MSG % ('temp', grace_period))
maruel84537cb2015-10-16 14:21:28 -0700599 if result['exit_code'] == 0:
600 result['exit_code'] = 1
maruela9cfd6f2015-09-15 11:03:15 -0700601
marueleb5fbee2015-09-17 13:01:36 -0700602 # This deletes out_dir if leak_temp_dir is not set.
nodir9130f072016-05-27 13:59:08 -0700603 if out_dir:
nodir55715712016-06-03 12:28:19 -0700604 isolated_stats = result['stats'].setdefault('isolated', {})
605 result['outputs_ref'], success, isolated_stats['upload'] = (
nodir9130f072016-05-27 13:59:08 -0700606 delete_and_upload(storage, out_dir, leak_temp_dir))
maruela9cfd6f2015-09-15 11:03:15 -0700607 if not success and result['exit_code'] == 0:
608 result['exit_code'] = 1
609 except Exception as e:
610 # Swallow any exception in the main finally clause.
nodir9130f072016-05-27 13:59:08 -0700611 if out_dir:
612 logging.exception('Leaking out_dir %s: %s', out_dir, e)
maruela9cfd6f2015-09-15 11:03:15 -0700613 result['internal_failure'] = str(e)
614 return result
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500615
616
Marc-Antoine Ruel0ec868b2015-08-12 14:12:46 -0400617def run_tha_test(
nodir26251c42017-05-11 13:21:53 -0700618 command, isolated_hash, storage, isolate_cache, outputs,
619 install_named_caches, leak_temp_dir, result_json, root_dir, hard_timeout,
620 grace_period, bot_file, install_packages_fn, use_symlinks):
nodir55be77b2016-05-03 09:39:57 -0700621 """Runs an executable and records execution metadata.
622
623 Either command or isolated_hash must be specified.
624
625 If isolated_hash is specified, downloads the dependencies in the cache,
626 hardlinks them into a temporary directory and runs the command specified in
627 the .isolated.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500628
629 A temporary directory is created to hold the output files. The content inside
630 this directory will be uploaded back to |storage| packaged as a .isolated
631 file.
632
633 Arguments:
maruelabec63c2017-04-26 11:53:24 -0700634 command: a list of string; the command to run OR optional arguments to add
635 to the command stated in the .isolated file if a command was
636 specified.
Marc-Antoine Ruel35b58432014-12-08 17:40:40 -0500637 isolated_hash: the SHA-1 of the .isolated file that must be retrieved to
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500638 recreate the tree of files to run the target executable.
nodir55be77b2016-05-03 09:39:57 -0700639 The command specified in the .isolated is executed.
640 Mutually exclusive with command argument.
Marc-Antoine Ruel2283ad12014-02-09 11:14:57 -0500641 storage: an isolateserver.Storage object to retrieve remote objects. This
642 object has a reference to an isolateserver.StorageApi, which does
643 the actual I/O.
nodir6b945692016-10-19 19:09:06 -0700644 isolate_cache: an isolateserver.LocalCache to keep from retrieving the
645 same objects constantly by caching the objects retrieved.
646 Can be on-disk or in-memory.
nodir26251c42017-05-11 13:21:53 -0700647 install_named_caches: a function (run_dir) => context manager that installs
648 named caches into |run_dir|.
Kenneth Russell61d42352014-09-15 11:41:16 -0700649 leak_temp_dir: if true, the temporary directory will be deliberately leaked
650 for later examination.
maruela9cfd6f2015-09-15 11:03:15 -0700651 result_json: file path to dump result metadata into. If set, the process
nodirbe642ff2016-06-09 15:51:51 -0700652 exit code is always 0 unless an internal error occurred.
nodir90bc8dc2016-06-15 13:35:21 -0700653 root_dir: path to the directory to use to create the temporary directory. If
marueleb5fbee2015-09-17 13:01:36 -0700654 not specified, a random temporary directory is created.
maruel6be7f9e2015-10-01 12:25:30 -0700655 hard_timeout: kills the process if it lasts more than this amount of
656 seconds.
657 grace_period: number of seconds to wait between SIGTERM and SIGKILL.
iannuccib58d10d2017-03-18 02:00:25 -0700658 install_packages_fn: context manager dir => CipdInfo, see
659 install_client_and_packages.
maruel4409e302016-07-19 14:25:51 -0700660 use_symlinks: create tree with symlinks instead of hardlinks.
maruela9cfd6f2015-09-15 11:03:15 -0700661
662 Returns:
663 Process exit code that should be used.
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000664 """
maruela76b9ee2015-12-15 06:18:08 -0800665 if result_json:
666 # Write a json output file right away in case we get killed.
667 result = {
668 'exit_code': None,
669 'had_hard_timeout': False,
670 'internal_failure': 'Was terminated before completion',
671 'outputs_ref': None,
nodirbe642ff2016-06-09 15:51:51 -0700672 'version': 5,
maruela76b9ee2015-12-15 06:18:08 -0800673 }
674 tools.write_json(result_json, result, dense=True)
675
maruela9cfd6f2015-09-15 11:03:15 -0700676 # run_isolated exit code. Depends on if result_json is used or not.
677 result = map_and_run(
nodir220308c2017-02-01 19:32:53 -0800678 command, isolated_hash, storage, isolate_cache, outputs,
nodir26251c42017-05-11 13:21:53 -0700679 install_named_caches, leak_temp_dir, root_dir, hard_timeout, grace_period,
maruelabec63c2017-04-26 11:53:24 -0700680 bot_file, install_packages_fn, use_symlinks, True)
maruela9cfd6f2015-09-15 11:03:15 -0700681 logging.info('Result:\n%s', tools.format_json(result, dense=True))
bpastene3ae09522016-06-10 17:12:59 -0700682
maruela9cfd6f2015-09-15 11:03:15 -0700683 if result_json:
maruel05d5a882015-09-21 13:59:02 -0700684 # We've found tests to delete 'work' when quitting, causing an exception
685 # here. Try to recreate the directory if necessary.
nodire5028a92016-04-29 14:38:21 -0700686 file_path.ensure_tree(os.path.dirname(result_json))
maruela9cfd6f2015-09-15 11:03:15 -0700687 tools.write_json(result_json, result, dense=True)
688 # Only return 1 if there was an internal error.
689 return int(bool(result['internal_failure']))
maruel@chromium.org781ccf62013-09-17 19:39:47 +0000690
maruela9cfd6f2015-09-15 11:03:15 -0700691 # Marshall into old-style inline output.
692 if result['outputs_ref']:
693 data = {
694 'hash': result['outputs_ref']['isolated'],
695 'namespace': result['outputs_ref']['namespace'],
696 'storage': result['outputs_ref']['isolatedserver'],
697 }
Marc-Antoine Ruelc44f5722015-01-08 16:10:01 -0500698 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700699 print(
700 '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
701 tools.format_json(data, dense=True))
maruelb76604c2015-11-11 11:53:44 -0800702 sys.stdout.flush()
maruela9cfd6f2015-09-15 11:03:15 -0700703 return result['exit_code'] or int(bool(result['internal_failure']))
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000704
705
iannuccib58d10d2017-03-18 02:00:25 -0700706# Yielded by 'install_client_and_packages'.
vadimsh232f5a82017-01-20 19:23:44 -0800707CipdInfo = collections.namedtuple('CipdInfo', [
708 'client', # cipd.CipdClient object
709 'cache_dir', # absolute path to bot-global cipd tag and instance cache
710 'stats', # dict with stats to return to the server
711 'pins', # dict with installed cipd pins to return to the server
712])
713
714
715@contextlib.contextmanager
716def noop_install_packages(_run_dir):
iannuccib58d10d2017-03-18 02:00:25 -0700717 """Placeholder for 'install_client_and_packages' if cipd is disabled."""
vadimsh232f5a82017-01-20 19:23:44 -0800718 yield None
719
720
iannuccib58d10d2017-03-18 02:00:25 -0700721def _install_packages(run_dir, cipd_cache_dir, client, packages, timeout):
722 """Calls 'cipd ensure' for packages.
723
724 Args:
725 run_dir (str): root of installation.
726 cipd_cache_dir (str): the directory to use for the cipd package cache.
727 client (CipdClient): the cipd client to use
728 packages: packages to install, list [(path, package_name, version), ...].
729 timeout: max duration in seconds that this function can take.
730
731 Returns: list of pinned packages. Looks like [
732 {
733 'path': 'subdirectory',
734 'package_name': 'resolved/package/name',
735 'version': 'deadbeef...',
736 },
737 ...
738 ]
739 """
740 package_pins = [None]*len(packages)
741 def insert_pin(path, name, version, idx):
742 package_pins[idx] = {
743 'package_name': name,
744 # swarming deals with 'root' as '.'
745 'path': path or '.',
746 'version': version,
747 }
748
749 by_path = collections.defaultdict(list)
750 for i, (path, name, version) in enumerate(packages):
751 # cipd deals with 'root' as ''
752 if path == '.':
753 path = ''
754 by_path[path].append((name, version, i))
755
756 pins = client.ensure(
757 run_dir,
758 {
759 subdir: [(name, vers) for name, vers, _ in pkgs]
760 for subdir, pkgs in by_path.iteritems()
761 },
762 cache_dir=cipd_cache_dir,
763 timeout=timeout,
764 )
765
766 for subdir, pin_list in sorted(pins.iteritems()):
767 this_subdir = by_path[subdir]
768 for i, (name, version) in enumerate(pin_list):
769 insert_pin(subdir, name, version, this_subdir[i][2])
770
771 assert None not in package_pins
772
773 return package_pins
774
775
vadimsh232f5a82017-01-20 19:23:44 -0800776@contextlib.contextmanager
iannuccib58d10d2017-03-18 02:00:25 -0700777def install_client_and_packages(
nodirff531b42016-06-23 13:05:06 -0700778 run_dir, packages, service_url, client_package_name,
vadimsh232f5a82017-01-20 19:23:44 -0800779 client_version, cache_dir, timeout=None):
vadimsh902948e2017-01-20 15:57:32 -0800780 """Bootstraps CIPD client and installs CIPD packages.
iannucci96fcccc2016-08-30 15:52:22 -0700781
vadimsh232f5a82017-01-20 19:23:44 -0800782 Yields CipdClient, stats, client info and pins (as single CipdInfo object).
783
784 Pins and the CIPD client info are in the form of:
iannucci96fcccc2016-08-30 15:52:22 -0700785 [
786 {
787 "path": path, "package_name": package_name, "version": version,
788 },
789 ...
790 ]
vadimsh902948e2017-01-20 15:57:32 -0800791 (the CIPD client info is a single dictionary instead of a list)
iannucci96fcccc2016-08-30 15:52:22 -0700792
793 such that they correspond 1:1 to all input package arguments from the command
794 line. These dictionaries make their all the way back to swarming, where they
795 become the arguments of CipdPackage.
nodirbe642ff2016-06-09 15:51:51 -0700796
vadimsh902948e2017-01-20 15:57:32 -0800797 If 'packages' list is empty, will bootstrap CIPD client, but won't install
798 any packages.
799
800 The bootstrapped client (regardless whether 'packages' list is empty or not),
vadimsh232f5a82017-01-20 19:23:44 -0800801 will be made available to the task via $PATH.
vadimsh902948e2017-01-20 15:57:32 -0800802
nodirbe642ff2016-06-09 15:51:51 -0700803 Args:
nodir90bc8dc2016-06-15 13:35:21 -0700804 run_dir (str): root of installation.
vadimsh902948e2017-01-20 15:57:32 -0800805 packages: packages to install, list [(path, package_name, version), ...].
nodirbe642ff2016-06-09 15:51:51 -0700806 service_url (str): CIPD server url, e.g.
807 "https://chrome-infra-packages.appspot.com."
nodir90bc8dc2016-06-15 13:35:21 -0700808 client_package_name (str): CIPD package name of CIPD client.
809 client_version (str): Version of CIPD client.
nodirbe642ff2016-06-09 15:51:51 -0700810 cache_dir (str): where to keep cache of cipd clients, packages and tags.
811 timeout: max duration in seconds that this function can take.
nodirbe642ff2016-06-09 15:51:51 -0700812 """
813 assert cache_dir
nodir90bc8dc2016-06-15 13:35:21 -0700814
nodirbe642ff2016-06-09 15:51:51 -0700815 timeoutfn = tools.sliding_timeout(timeout)
nodirbe642ff2016-06-09 15:51:51 -0700816 start = time.time()
nodirbe642ff2016-06-09 15:51:51 -0700817
vadimsh902948e2017-01-20 15:57:32 -0800818 cache_dir = os.path.abspath(cache_dir)
vadimsh232f5a82017-01-20 19:23:44 -0800819 cipd_cache_dir = os.path.join(cache_dir, 'cache') # tag and instance caches
nodir90bc8dc2016-06-15 13:35:21 -0700820 run_dir = os.path.abspath(run_dir)
vadimsh902948e2017-01-20 15:57:32 -0800821 packages = packages or []
nodir90bc8dc2016-06-15 13:35:21 -0700822
nodirbe642ff2016-06-09 15:51:51 -0700823 get_client_start = time.time()
824 client_manager = cipd.get_client(
825 service_url, client_package_name, client_version, cache_dir,
826 timeout=timeoutfn())
iannucci96fcccc2016-08-30 15:52:22 -0700827
nodirbe642ff2016-06-09 15:51:51 -0700828 with client_manager as client:
829 get_client_duration = time.time() - get_client_start
nodir90bc8dc2016-06-15 13:35:21 -0700830
iannuccib58d10d2017-03-18 02:00:25 -0700831 package_pins = []
832 if packages:
833 package_pins = _install_packages(
834 run_dir, cipd_cache_dir, client, packages, timeoutfn())
835
836 file_path.make_tree_files_read_only(run_dir)
nodir90bc8dc2016-06-15 13:35:21 -0700837
vadimsh232f5a82017-01-20 19:23:44 -0800838 total_duration = time.time() - start
839 logging.info(
840 'Installing CIPD client and packages took %d seconds', total_duration)
nodir90bc8dc2016-06-15 13:35:21 -0700841
vadimsh232f5a82017-01-20 19:23:44 -0800842 yield CipdInfo(
843 client=client,
844 cache_dir=cipd_cache_dir,
845 stats={
846 'duration': total_duration,
847 'get_client_duration': get_client_duration,
848 },
849 pins={
iannuccib58d10d2017-03-18 02:00:25 -0700850 'client_package': {
851 'package_name': client.package_name,
852 'version': client.instance_id,
853 },
vadimsh232f5a82017-01-20 19:23:44 -0800854 'packages': package_pins,
855 })
nodirbe642ff2016-06-09 15:51:51 -0700856
857
nodirf33b8d62016-10-26 22:34:58 -0700858def clean_caches(options, isolate_cache, named_cache_manager):
maruele6fc9382017-05-04 09:03:48 -0700859 """Trims isolated and named caches.
860
861 The goal here is to coherently trim both caches, deleting older items
862 independent of which container they belong to.
863 """
864 # TODO(maruel): Trim CIPD cache the same way.
865 total = 0
nodirf33b8d62016-10-26 22:34:58 -0700866 with named_cache_manager.open():
867 oldest_isolated = isolate_cache.get_oldest()
868 oldest_named = named_cache_manager.get_oldest()
869 trimmers = [
870 (
871 isolate_cache.trim,
872 isolate_cache.get_timestamp(oldest_isolated) if oldest_isolated else 0,
873 ),
874 (
875 lambda: named_cache_manager.trim(options.min_free_space),
876 named_cache_manager.get_timestamp(oldest_named) if oldest_named else 0,
877 ),
878 ]
879 trimmers.sort(key=lambda (_, ts): ts)
maruele6fc9382017-05-04 09:03:48 -0700880 # TODO(maruel): This is incorrect, we want to trim 'items' that are strictly
881 # the oldest independent of in which cache they live in. Right now, the
882 # cache with the oldest item pays the price.
nodirf33b8d62016-10-26 22:34:58 -0700883 for trim, _ in trimmers:
maruele6fc9382017-05-04 09:03:48 -0700884 total += trim()
nodirf33b8d62016-10-26 22:34:58 -0700885 isolate_cache.cleanup()
maruele6fc9382017-05-04 09:03:48 -0700886 return total
nodirf33b8d62016-10-26 22:34:58 -0700887
888
nodirbe642ff2016-06-09 15:51:51 -0700889def create_option_parser():
Marc-Antoine Ruelf74cffe2015-07-15 15:21:34 -0400890 parser = logging_utils.OptionParserWithLogging(
nodir55be77b2016-05-03 09:39:57 -0700891 usage='%prog <options> [command to run or extra args]',
maruel@chromium.orgdedbf492013-09-12 20:42:11 +0000892 version=__version__,
893 log_file=RUN_ISOLATED_LOG_FILE)
maruela9cfd6f2015-09-15 11:03:15 -0700894 parser.add_option(
maruel36a963d2016-04-08 17:15:49 -0700895 '--clean', action='store_true',
896 help='Cleans the cache, trimming it necessary and remove corrupted items '
897 'and returns without executing anything; use with -v to know what '
898 'was done')
899 parser.add_option(
maruel2e8d0f52016-07-16 07:51:29 -0700900 '--no-clean', action='store_true',
901 help='Do not clean the cache automatically on startup. This is meant for '
902 'bots where a separate execution with --clean was done earlier so '
903 'doing it again is redundant')
904 parser.add_option(
maruel4409e302016-07-19 14:25:51 -0700905 '--use-symlinks', action='store_true',
906 help='Use symlinks instead of hardlinks')
907 parser.add_option(
maruela9cfd6f2015-09-15 11:03:15 -0700908 '--json',
909 help='dump output metadata to json file. When used, run_isolated returns '
910 'non-zero only on internal failure')
maruel6be7f9e2015-10-01 12:25:30 -0700911 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -0800912 '--hard-timeout', type='float', help='Enforce hard timeout in execution')
maruel6be7f9e2015-10-01 12:25:30 -0700913 parser.add_option(
maruel5c9e47b2015-12-18 13:02:30 -0800914 '--grace-period', type='float',
maruel6be7f9e2015-10-01 12:25:30 -0700915 help='Grace period between SIGTERM and SIGKILL')
bpastene3ae09522016-06-10 17:12:59 -0700916 parser.add_option(
917 '--bot-file',
918 help='Path to a file describing the state of the host. The content is '
919 'defined by on_before_task() in bot_config.')
aludwin7556e0c2016-10-26 08:46:10 -0700920 parser.add_option(
aludwin0a8e17d2016-10-27 15:57:39 -0700921 '--output', action='append',
922 help='Specifies an output to return. If no outputs are specified, all '
923 'files located in $(ISOLATED_OUTDIR) will be returned; '
924 'otherwise, outputs in both $(ISOLATED_OUTDIR) and those '
925 'specified by --output option (there can be multiple) will be '
926 'returned. Note that if a file in OUT_DIR has the same path '
927 'as an --output option, the --output version will be returned.')
928 parser.add_option(
aludwin7556e0c2016-10-26 08:46:10 -0700929 '-a', '--argsfile',
930 # This is actually handled in parse_args; it's included here purely so it
931 # can make it into the help text.
932 help='Specify a file containing a JSON array of arguments to this '
933 'script. If --argsfile is provided, no other argument may be '
934 'provided on the command line.')
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500935 data_group = optparse.OptionGroup(parser, 'Data source')
936 data_group.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500937 '-s', '--isolated',
nodir55be77b2016-05-03 09:39:57 -0700938 help='Hash of the .isolated to grab from the isolate server.')
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -0500939 isolateserver.add_isolate_server_options(data_group)
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500940 parser.add_option_group(data_group)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000941
Marc-Antoine Ruela57d7db2014-10-15 20:31:19 -0400942 isolateserver.add_cache_options(parser)
nodirbe642ff2016-06-09 15:51:51 -0700943
944 cipd.add_cipd_options(parser)
nodirf33b8d62016-10-26 22:34:58 -0700945 named_cache.add_named_cache_options(parser)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +0000946
Kenneth Russell61d42352014-09-15 11:41:16 -0700947 debug_group = optparse.OptionGroup(parser, 'Debugging')
948 debug_group.add_option(
949 '--leak-temp-dir',
950 action='store_true',
nodirbe642ff2016-06-09 15:51:51 -0700951 help='Deliberately leak isolate\'s temp dir for later examination. '
952 'Default: %default')
marueleb5fbee2015-09-17 13:01:36 -0700953 debug_group.add_option(
954 '--root-dir', help='Use a directory instead of a random one')
Kenneth Russell61d42352014-09-15 11:41:16 -0700955 parser.add_option_group(debug_group)
956
Vadim Shtayurae34e13a2014-02-02 11:23:26 -0800957 auth.add_auth_options(parser)
nodirbe642ff2016-06-09 15:51:51 -0700958
nodirf33b8d62016-10-26 22:34:58 -0700959 parser.set_defaults(
960 cache='cache',
961 cipd_cache='cipd_cache',
962 named_cache_root='named_caches')
nodirbe642ff2016-06-09 15:51:51 -0700963 return parser
964
965
aludwin7556e0c2016-10-26 08:46:10 -0700966def parse_args(args):
967 # Create a fake mini-parser just to get out the "-a" command. Note that
968 # it's not documented here; instead, it's documented in create_option_parser
969 # even though that parser will never actually get to parse it. This is
970 # because --argsfile is exclusive with all other options and arguments.
971 file_argparse = argparse.ArgumentParser(add_help=False)
972 file_argparse.add_argument('-a', '--argsfile')
973 (file_args, nonfile_args) = file_argparse.parse_known_args(args)
974 if file_args.argsfile:
975 if nonfile_args:
976 file_argparse.error('Can\'t specify --argsfile with'
977 'any other arguments (%s)' % nonfile_args)
978 try:
979 with open(file_args.argsfile, 'r') as f:
980 args = json.load(f)
981 except (IOError, OSError, ValueError) as e:
982 # We don't need to error out here - "args" is now empty,
983 # so the call below to parser.parse_args(args) will fail
984 # and print the full help text.
985 print >> sys.stderr, 'Couldn\'t read arguments: %s' % e
986
987 # Even if we failed to read the args, just call the normal parser now since it
988 # will print the correct help message.
nodirbe642ff2016-06-09 15:51:51 -0700989 parser = create_option_parser()
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -0500990 options, args = parser.parse_args(args)
aludwin7556e0c2016-10-26 08:46:10 -0700991 return (parser, options, args)
992
993
994def main(args):
995 (parser, options, args) = parse_args(args)
maruel36a963d2016-04-08 17:15:49 -0700996
nodirf33b8d62016-10-26 22:34:58 -0700997 isolate_cache = isolateserver.process_cache_options(options, trim=False)
998 named_cache_manager = named_cache.process_named_cache_options(parser, options)
maruel36a963d2016-04-08 17:15:49 -0700999 if options.clean:
1000 if options.isolated:
1001 parser.error('Can\'t use --isolated with --clean.')
1002 if options.isolate_server:
1003 parser.error('Can\'t use --isolate-server with --clean.')
1004 if options.json:
1005 parser.error('Can\'t use --json with --clean.')
nodirf33b8d62016-10-26 22:34:58 -07001006 if options.named_caches:
1007 parser.error('Can\t use --named-cache with --clean.')
1008 clean_caches(options, isolate_cache, named_cache_manager)
maruel36a963d2016-04-08 17:15:49 -07001009 return 0
nodirf33b8d62016-10-26 22:34:58 -07001010
maruel2e8d0f52016-07-16 07:51:29 -07001011 if not options.no_clean:
nodirf33b8d62016-10-26 22:34:58 -07001012 clean_caches(options, isolate_cache, named_cache_manager)
maruel36a963d2016-04-08 17:15:49 -07001013
nodir55be77b2016-05-03 09:39:57 -07001014 if not options.isolated and not args:
1015 parser.error('--isolated or command to run is required.')
1016
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001017 auth.process_auth_options(parser, options)
nodir55be77b2016-05-03 09:39:57 -07001018
1019 isolateserver.process_isolate_server_options(
1020 parser, options, True, False)
1021 if not options.isolate_server:
1022 if options.isolated:
1023 parser.error('--isolated requires --isolate-server')
1024 if ISOLATED_OUTDIR_PARAMETER in args:
1025 parser.error(
1026 '%s in args requires --isolate-server' % ISOLATED_OUTDIR_PARAMETER)
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001027
nodir90bc8dc2016-06-15 13:35:21 -07001028 if options.root_dir:
1029 options.root_dir = unicode(os.path.abspath(options.root_dir))
maruel12e30012015-10-09 11:55:35 -07001030 if options.json:
1031 options.json = unicode(os.path.abspath(options.json))
nodir55be77b2016-05-03 09:39:57 -07001032
nodirbe642ff2016-06-09 15:51:51 -07001033 cipd.validate_cipd_options(parser, options)
1034
vadimsh232f5a82017-01-20 19:23:44 -08001035 install_packages_fn = noop_install_packages
vadimsh902948e2017-01-20 15:57:32 -08001036 if options.cipd_enabled:
iannuccib58d10d2017-03-18 02:00:25 -07001037 install_packages_fn = lambda run_dir: install_client_and_packages(
vadimsh902948e2017-01-20 15:57:32 -08001038 run_dir, cipd.parse_package_args(options.cipd_packages),
1039 options.cipd_server, options.cipd_client_package,
1040 options.cipd_client_version, cache_dir=options.cipd_cache)
nodirbe642ff2016-06-09 15:51:51 -07001041
nodird6160682017-02-02 13:03:35 -08001042 @contextlib.contextmanager
nodir26251c42017-05-11 13:21:53 -07001043 def install_named_caches(run_dir):
nodird6160682017-02-02 13:03:35 -08001044 # WARNING: this function depends on "options" variable defined in the outer
1045 # function.
nodir26251c42017-05-11 13:21:53 -07001046 caches = [
1047 (os.path.join(run_dir, unicode(relpath)), name)
1048 for name, relpath in options.named_caches
1049 ]
nodirf33b8d62016-10-26 22:34:58 -07001050 with named_cache_manager.open():
nodir26251c42017-05-11 13:21:53 -07001051 for path, name in caches:
1052 named_cache_manager.install(path, name)
nodird6160682017-02-02 13:03:35 -08001053 try:
1054 yield
1055 finally:
nodir26251c42017-05-11 13:21:53 -07001056 with named_cache_manager.open():
1057 for path, name in caches:
1058 named_cache_manager.uninstall(path, name)
nodirf33b8d62016-10-26 22:34:58 -07001059
nodirbe642ff2016-06-09 15:51:51 -07001060 try:
nodir90bc8dc2016-06-15 13:35:21 -07001061 if options.isolate_server:
1062 storage = isolateserver.get_storage(
1063 options.isolate_server, options.namespace)
1064 with storage:
nodirf33b8d62016-10-26 22:34:58 -07001065 # Hashing schemes used by |storage| and |isolate_cache| MUST match.
1066 assert storage.hash_algo == isolate_cache.hash_algo
nodirbe642ff2016-06-09 15:51:51 -07001067 return run_tha_test(
maruelabec63c2017-04-26 11:53:24 -07001068 args,
nodirf33b8d62016-10-26 22:34:58 -07001069 options.isolated,
1070 storage,
1071 isolate_cache,
aludwin0a8e17d2016-10-27 15:57:39 -07001072 options.output,
nodir26251c42017-05-11 13:21:53 -07001073 install_named_caches,
nodirf33b8d62016-10-26 22:34:58 -07001074 options.leak_temp_dir,
1075 options.json, options.root_dir,
1076 options.hard_timeout,
1077 options.grace_period,
maruelabec63c2017-04-26 11:53:24 -07001078 options.bot_file,
nodirf33b8d62016-10-26 22:34:58 -07001079 install_packages_fn,
1080 options.use_symlinks)
maruel4409e302016-07-19 14:25:51 -07001081 return run_tha_test(
maruelabec63c2017-04-26 11:53:24 -07001082 args,
nodirf33b8d62016-10-26 22:34:58 -07001083 options.isolated,
1084 None,
1085 isolate_cache,
aludwin0a8e17d2016-10-27 15:57:39 -07001086 options.output,
nodir26251c42017-05-11 13:21:53 -07001087 install_named_caches,
nodirf33b8d62016-10-26 22:34:58 -07001088 options.leak_temp_dir,
1089 options.json,
1090 options.root_dir,
1091 options.hard_timeout,
1092 options.grace_period,
maruelabec63c2017-04-26 11:53:24 -07001093 options.bot_file,
nodirf33b8d62016-10-26 22:34:58 -07001094 install_packages_fn,
maruel4409e302016-07-19 14:25:51 -07001095 options.use_symlinks)
nodirf33b8d62016-10-26 22:34:58 -07001096 except (cipd.Error, named_cache.Error) as ex:
nodirbe642ff2016-06-09 15:51:51 -07001097 print >> sys.stderr, ex.message
1098 return 1
maruel@chromium.org9c72d4e2012-09-28 19:20:25 +00001099
1100
1101if __name__ == '__main__':
maruel8e4e40c2016-05-30 06:21:07 -07001102 subprocess42.inhibit_os_error_reporting()
csharp@chromium.orgbfb98742013-03-26 20:28:36 +00001103 # Ensure that we are always running with the correct encoding.
vadimsh@chromium.orga4326472013-08-24 02:05:41 +00001104 fix_encoding.fix_encoding()
maruel4409e302016-07-19 14:25:51 -07001105 file_path.enable_symlink()
aludwin7556e0c2016-10-26 08:46:10 -07001106
Marc-Antoine Ruel90c98162013-12-18 15:11:57 -05001107 sys.exit(main(sys.argv[1:]))