blob: 840e2a99cee833381b46124e9704f122a5d27afa [file] [log] [blame]
maruel@chromium.org0437a732013-08-27 16:05:52 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00005
6"""Client tool to trigger tasks or retrieve results from a Swarming server."""
7
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04008__version__ = '0.4.11'
maruel@chromium.org0437a732013-08-27 16:05:52 +00009
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040010import datetime
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -050011import getpass
maruel@chromium.org0437a732013-08-27 16:05:52 +000012import hashlib
13import json
14import logging
15import os
Vadim Shtayurae3fbd102014-04-29 17:05:21 -070016import re
maruel@chromium.org0437a732013-08-27 16:05:52 +000017import shutil
maruel@chromium.org0437a732013-08-27 16:05:52 +000018import subprocess
19import sys
Vadim Shtayurab19319e2014-04-27 08:50:06 -070020import threading
maruel@chromium.org0437a732013-08-27 16:05:52 +000021import time
22import urllib
maruel@chromium.org0437a732013-08-27 16:05:52 +000023
24from third_party import colorama
25from third_party.depot_tools import fix_encoding
26from third_party.depot_tools import subcommand
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000027
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -050028from utils import file_path
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040029from third_party.chromium import natsort
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000030from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040031from utils import on_error
maruel@chromium.org0437a732013-08-27 16:05:52 +000032from utils import threading_utils
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000033from utils import tools
34from utils import zip_package
maruel@chromium.org0437a732013-08-27 16:05:52 +000035
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080036import auth
maruel@chromium.org7b844a62013-09-17 13:04:59 +000037import isolateserver
maruel@chromium.org0437a732013-08-27 16:05:52 +000038import run_isolated
39
40
41ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
42TOOLS_PATH = os.path.join(ROOT_DIR, 'tools')
43
44
maruel@chromium.org0437a732013-08-27 16:05:52 +000045# The default time to wait for a shard to finish running.
csharp@chromium.org24758492013-08-28 19:10:54 +000046DEFAULT_SHARD_WAIT_TIME = 80 * 60.
maruel@chromium.org0437a732013-08-27 16:05:52 +000047
Vadim Shtayura86a2cef2014-04-18 11:13:39 -070048# How often to print status updates to stdout in 'collect'.
49STATUS_UPDATE_INTERVAL = 15 * 60.
50
maruel@chromium.org0437a732013-08-27 16:05:52 +000051
52NO_OUTPUT_FOUND = (
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -050053 'No output produced by the task, it may have failed to run.\n'
maruel@chromium.org0437a732013-08-27 16:05:52 +000054 '\n')
55
56
maruel@chromium.org0437a732013-08-27 16:05:52 +000057class Failure(Exception):
58 """Generic failure."""
59 pass
60
61
62class Manifest(object):
Vadim Shtayurab450c602014-05-12 19:23:25 -070063 """Represents a Swarming task manifest."""
maruel@chromium.org0437a732013-08-27 16:05:52 +000064
maruel@chromium.org0437a732013-08-27 16:05:52 +000065 def __init__(
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070066 self, isolate_server, namespace, isolated_hash, task_name, extra_args,
Marc-Antoine Ruelaea50652014-06-12 14:23:48 -040067 env, dimensions, deadline, verbose, profile,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070068 priority):
maruel@chromium.org0437a732013-08-27 16:05:52 +000069 """Populates a manifest object.
70 Args:
Marc-Antoine Ruela7049872013-11-05 19:28:35 -050071 isolate_server - isolate server url.
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050072 namespace - isolate server namespace to use.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070073 isolated_hash - the manifest's sha-1 that the slave is going to fetch.
74 task_name - the name to give the task request.
75 extra_args - additional arguments to pass to isolated command.
Marc-Antoine Ruel05dab5e2013-11-06 15:06:47 -050076 env - environment variables to set.
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -050077 dimensions - dimensions to filter the task on.
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -040078 deadline - maximum pending time before this task expires.
maruel@chromium.org0437a732013-08-27 16:05:52 +000079 verbose - if True, have the slave print more details.
80 profile - if True, have the slave print more timing data.
maruel@chromium.org7b844a62013-09-17 13:04:59 +000081 priority - int between 0 and 1000, lower the higher priority.
maruel@chromium.org0437a732013-08-27 16:05:52 +000082 """
Marc-Antoine Ruela7049872013-11-05 19:28:35 -050083 self.isolate_server = isolate_server
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050084 self.namespace = namespace
maruel@chromium.org814d23f2013-10-01 19:08:00 +000085 self.isolated_hash = isolated_hash
Vadim Shtayurab450c602014-05-12 19:23:25 -070086 self.task_name = task_name
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070087 self.extra_args = tuple(extra_args or [])
Vadim Shtayurab450c602014-05-12 19:23:25 -070088 self.env = env.copy()
89 self.dimensions = dimensions.copy()
Vadim Shtayurab450c602014-05-12 19:23:25 -070090 self.deadline = deadline
maruel@chromium.org0437a732013-08-27 16:05:52 +000091 self.verbose = bool(verbose)
92 self.profile = bool(profile)
93 self.priority = priority
maruel@chromium.org0437a732013-08-27 16:05:52 +000094 self._tasks = []
Vadim Shtayurab450c602014-05-12 19:23:25 -070095 self._files = []
maruel@chromium.org0437a732013-08-27 16:05:52 +000096
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -040097 def add_task(self, task_name, actions, time_out=2*60*60):
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -050098 """Appends a new task as a TestObject to the swarming manifest file.
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -050099
100 Tasks cannot be added once the manifest was uploaded.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500101
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -0400102 By default, command will be killed after 2 hours of execution.
103
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500104 See TestObject in services/swarming/src/common/test_request_message.py for
105 the valid format.
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500106 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000107 self._tasks.append(
108 {
109 'action': actions,
110 'decorate_output': self.verbose,
111 'test_name': task_name,
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -0400112 'hard_time_out': time_out,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000113 })
114
Vadim Shtayurab450c602014-05-12 19:23:25 -0700115 def add_bundled_file(self, file_name, file_url):
116 """Appends a file to the manifest.
117
118 File will be downloaded and extracted by the swarm bot before launching the
119 task.
120 """
121 self._files.append([file_url, file_name])
122
maruel@chromium.org0437a732013-08-27 16:05:52 +0000123 def to_json(self):
124 """Exports the current configuration into a swarm-readable manifest file.
125
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500126 The actual serialization format is defined as a TestCase object as described
127 in services/swarming/src/common/test_request_message.py
maruel@chromium.org0437a732013-08-27 16:05:52 +0000128 """
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500129 request = {
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500130 'cleanup': 'root',
maruel@chromium.org0437a732013-08-27 16:05:52 +0000131 'configurations': [
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500132 # Is a TestConfiguration.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000133 {
Marc-Antoine Ruel5d799192013-11-06 15:20:39 -0500134 'config_name': 'isolated',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700135 'deadline_to_run': self.deadline,
136 'dimensions': self.dimensions,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500137 'priority': self.priority,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000138 },
139 ],
Vadim Shtayurab450c602014-05-12 19:23:25 -0700140 'data': self._files,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700141 'env_vars': self.env,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700142 'test_case_name': self.task_name,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500143 'tests': self._tasks,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000144 }
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500145 return json.dumps(request, sort_keys=True, separators=(',',':'))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000146
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500147
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700148class TaskOutputCollector(object):
149 """Fetches task output from isolate server to local disk.
150
151 This object is shared among multiple threads running 'retrieve_results'
152 function, in particular they call 'process_shard_result' method in parallel.
153 """
154
155 def __init__(self, task_output_dir, task_name, shard_count):
156 """Initializes TaskOutputCollector, ensures |task_output_dir| exists.
157
158 Args:
159 task_output_dir: local directory to put fetched files to.
160 task_name: name of the swarming task results belong to.
161 shard_count: expected number of task shards.
162 """
163 self.task_output_dir = task_output_dir
164 self.task_name = task_name
165 self.shard_count = shard_count
166
167 self._lock = threading.Lock()
168 self._per_shard_results = {}
169 self._storage = None
170
171 if not os.path.isdir(self.task_output_dir):
172 os.makedirs(self.task_output_dir)
173
Vadim Shtayurab450c602014-05-12 19:23:25 -0700174 def process_shard_result(self, shard_index, result):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700175 """Stores results of a single task shard, fetches output files if necessary.
176
177 Called concurrently from multiple threads.
178 """
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700179 # Sanity check index is in expected range.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700180 assert isinstance(shard_index, int)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700181 if shard_index < 0 or shard_index >= self.shard_count:
182 logging.warning(
183 'Shard index %d is outside of expected range: [0; %d]',
184 shard_index, self.shard_count - 1)
185 return
186
187 # Store result dict of that shard, ignore results we've already seen.
188 with self._lock:
189 if shard_index in self._per_shard_results:
190 logging.warning('Ignoring duplicate shard index %d', shard_index)
191 return
192 self._per_shard_results[shard_index] = result
193
194 # Fetch output files if necessary.
195 isolated_files_location = extract_output_files_location(result['output'])
196 if isolated_files_location:
197 isolate_server, namespace, isolated_hash = isolated_files_location
198 storage = self._get_storage(isolate_server, namespace)
199 if storage:
200 # Output files are supposed to be small and they are not reused across
201 # tasks. So use MemoryCache for them instead of on-disk cache. Make
202 # files writable, so that calling script can delete them.
203 isolateserver.fetch_isolated(
204 isolated_hash,
205 storage,
206 isolateserver.MemoryCache(file_mode_mask=0700),
207 os.path.join(self.task_output_dir, str(shard_index)),
208 False)
209
210 def finalize(self):
211 """Writes summary.json, shutdowns underlying Storage."""
212 with self._lock:
213 # Write an array of shard results with None for missing shards.
214 summary = {
215 'task_name': self.task_name,
216 'shards': [
217 self._per_shard_results.get(i) for i in xrange(self.shard_count)
218 ],
219 }
220 tools.write_json(
221 os.path.join(self.task_output_dir, 'summary.json'),
222 summary,
223 False)
224 if self._storage:
225 self._storage.close()
226 self._storage = None
227
228 def _get_storage(self, isolate_server, namespace):
229 """Returns isolateserver.Storage to use to fetch files."""
230 with self._lock:
231 if not self._storage:
232 self._storage = isolateserver.get_storage(isolate_server, namespace)
233 else:
234 # Shards must all use exact same isolate server and namespace.
235 if self._storage.location != isolate_server:
236 logging.error(
237 'Task shards are using multiple isolate servers: %s and %s',
238 self._storage.location, isolate_server)
239 return None
240 if self._storage.namespace != namespace:
241 logging.error(
242 'Task shards are using multiple namespaces: %s and %s',
243 self._storage.namespace, namespace)
244 return None
245 return self._storage
246
247
maruel@chromium.org0437a732013-08-27 16:05:52 +0000248def now():
249 """Exists so it can be mocked easily."""
250 return time.time()
251
252
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500253def get_task_keys(swarm_base_url, task_name):
254 """Returns the Swarming task key for each shards of task_name."""
255 key_data = urllib.urlencode([('name', task_name)])
maruel@chromium.org0437a732013-08-27 16:05:52 +0000256 url = '%s/get_matching_test_cases?%s' % (swarm_base_url, key_data)
257
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000258 for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
259 result = net.url_read(url, retry_404=True)
260 if result is None:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000261 raise Failure(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500262 'Error: Unable to find any task with the name, %s, on swarming server'
263 % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000264
maruel@chromium.org0437a732013-08-27 16:05:52 +0000265 # TODO(maruel): Compare exact string.
266 if 'No matching' in result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500267 logging.warning('Unable to find any task with the name, %s, on swarming '
268 'server' % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000269 continue
270 return json.loads(result)
271
272 raise Failure(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500273 'Error: Unable to find any task with the name, %s, on swarming server'
274 % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000275
276
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700277def extract_output_files_location(task_log):
278 """Task log -> location of task output files to fetch.
279
280 TODO(vadimsh,maruel): Use side-channel to get this information.
281 See 'run_tha_test' in run_isolated.py for where the data is generated.
282
283 Returns:
284 Tuple (isolate server URL, namespace, isolated hash) on success.
285 None if information is missing or can not be parsed.
286 """
287 match = re.search(
288 r'\[run_isolated_out_hack\](.*)\[/run_isolated_out_hack\]',
289 task_log,
290 re.DOTALL)
291 if not match:
292 return None
293
294 def to_ascii(val):
295 if not isinstance(val, basestring):
296 raise ValueError()
297 return val.encode('ascii')
298
299 try:
300 data = json.loads(match.group(1))
301 if not isinstance(data, dict):
302 raise ValueError()
303 isolated_hash = to_ascii(data['hash'])
304 namespace = to_ascii(data['namespace'])
305 isolate_server = to_ascii(data['storage'])
306 if not file_path.is_url(isolate_server):
307 raise ValueError()
308 return (isolate_server, namespace, isolated_hash)
309 except (KeyError, ValueError):
310 logging.warning(
311 'Unexpected value of run_isolated_out_hack: %s', match.group(1))
312 return None
313
314
315def retrieve_results(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700316 base_url, shard_index, task_key, timeout, should_stop, output_collector):
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700317 """Retrieves results for a single task_key.
318
Vadim Shtayurab450c602014-05-12 19:23:25 -0700319 Returns:
320 <result dict> on success.
321 None on failure.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700322 """
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000323 assert isinstance(timeout, float), timeout
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500324 params = [('r', task_key)]
maruel@chromium.org0437a732013-08-27 16:05:52 +0000325 result_url = '%s/get_result?%s' % (base_url, urllib.urlencode(params))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700326 started = now()
327 deadline = started + timeout if timeout else None
328 attempt = 0
329
330 while not should_stop.is_set():
331 attempt += 1
332
333 # Waiting for too long -> give up.
334 current_time = now()
335 if deadline and current_time >= deadline:
336 logging.error('retrieve_results(%s) timed out on attempt %d',
337 base_url, attempt)
338 return None
339
340 # Do not spin too fast. Spin faster at the beginning though.
341 # Start with 1 sec delay and for each 30 sec of waiting add another second
342 # of delay, until hitting 15 sec ceiling.
343 if attempt > 1:
344 max_delay = min(15, 1 + (current_time - started) / 30.0)
345 delay = min(max_delay, deadline - current_time) if deadline else max_delay
346 if delay > 0:
347 logging.debug('Waiting %.1f sec before retrying', delay)
348 should_stop.wait(delay)
349 if should_stop.is_set():
350 return None
351
352 # Disable internal retries in net.url_read, since we are doing retries
353 # ourselves. Do not use retry_404 so should_stop is polled more often.
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000354 response = net.url_read(result_url, retry_404=False, retry_50x=False)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700355
356 # Request failed. Try again.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000357 if response is None:
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700358 continue
359
360 # Got some response, ensure it is JSON dict, retry if not.
361 try:
362 result = json.loads(response) or {}
363 if not isinstance(result, dict):
364 raise ValueError()
365 except (ValueError, TypeError):
366 logging.warning(
367 'Received corrupted or invalid data for task_key %s, retrying: %r',
368 task_key, response)
369 continue
370
371 # Swarming server uses non-empty 'output' value as a flag that task has
372 # finished. How to wait for tasks that produce no output is a mystery.
373 if result.get('output'):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700374 # Record the result, try to fetch attached output files (if any).
375 if output_collector:
376 # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700377 output_collector.process_shard_result(shard_index, result)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700378 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +0000379
380
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700381def yield_results(
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700382 swarm_base_url, task_keys, timeout, max_threads,
383 print_status_updates, output_collector):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500384 """Yields swarming task results from the swarming server as (index, result).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000385
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700386 Duplicate shards are ignored. Shards are yielded in order of completion.
387 Timed out shards are NOT yielded at all. Caller can compare number of yielded
388 shards with len(task_keys) to verify all shards completed.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000389
390 max_threads is optional and is used to limit the number of parallel fetches
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500391 done. Since in general the number of task_keys is in the range <=10, it's not
maruel@chromium.org0437a732013-08-27 16:05:52 +0000392 worth normally to limit the number threads. Mostly used for testing purposes.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500393
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700394 output_collector is an optional instance of TaskOutputCollector that will be
395 used to fetch files produced by a task from isolate server to the local disk.
396
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500397 Yields:
398 (index, result). In particular, 'result' is defined as the
399 GetRunnerResults() function in services/swarming/server/test_runner.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000400 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000401 number_threads = (
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500402 min(max_threads, len(task_keys)) if max_threads else len(task_keys))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700403 should_stop = threading.Event()
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700404 results_channel = threading_utils.TaskChannel()
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700405
maruel@chromium.org0437a732013-08-27 16:05:52 +0000406 with threading_utils.ThreadPool(number_threads, number_threads, 0) as pool:
407 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700408 # Adds a task to the thread pool to call 'retrieve_results' and return
409 # the results together with shard_index that produced them (as a tuple).
410 def enqueue_retrieve_results(shard_index, task_key):
411 task_fn = lambda *args: (shard_index, retrieve_results(*args))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000412 pool.add_task(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700413 0, results_channel.wrap_task(task_fn),
414 swarm_base_url, shard_index, task_key, timeout,
415 should_stop, output_collector)
416
417 # Enqueue 'retrieve_results' calls for each shard key to run in parallel.
418 for shard_index, task_key in enumerate(task_keys):
419 enqueue_retrieve_results(shard_index, task_key)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700420
421 # Wait for all of them to finish.
422 shards_remaining = range(len(task_keys))
423 active_task_count = len(task_keys)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700424 while active_task_count:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700425 shard_index, result = None, None
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700426 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700427 shard_index, result = results_channel.pull(
428 timeout=STATUS_UPDATE_INTERVAL)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700429 except threading_utils.TaskChannel.Timeout:
430 if print_status_updates:
431 print(
432 'Waiting for results from the following shards: %s' %
433 ', '.join(map(str, shards_remaining)))
434 sys.stdout.flush()
435 continue
436 except Exception:
437 logging.exception('Unexpected exception in retrieve_results')
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700438
439 # A call to 'retrieve_results' finished (successfully or not).
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700440 active_task_count -= 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000441 if not result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500442 logging.error('Failed to retrieve the results for a swarming key')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000443 continue
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700444
Vadim Shtayurab450c602014-05-12 19:23:25 -0700445 # Yield back results to the caller.
446 assert shard_index in shards_remaining
447 shards_remaining.remove(shard_index)
448 yield shard_index, result
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700449
maruel@chromium.org0437a732013-08-27 16:05:52 +0000450 finally:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700451 # Done or aborted with Ctrl+C, kill the remaining threads.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000452 should_stop.set()
453
454
Vadim Shtayurab450c602014-05-12 19:23:25 -0700455def setup_run_isolated(manifest, bundle):
456 """Sets up the manifest to run an isolated task via run_isolated.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000457
Vadim Shtayurab450c602014-05-12 19:23:25 -0700458 Modifies |bundle| (by adding files) and |manifest| (by adding commands) in
459 place.
460
461 Args:
462 manifest: Manifest with swarm task definition.
463 bundle: ZipPackage with files that would be transfered to swarm bot.
464 If None, only |manifest| is modified (useful in tests).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000465 """
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000466 # Add uncompressed zip here. It'll be compressed as part of the package sent
467 # to Swarming server.
468 run_test_name = 'run_isolated.zip'
Vadim Shtayurab450c602014-05-12 19:23:25 -0700469 if bundle and run_test_name not in bundle.files:
470 bundle.add_buffer(
471 run_test_name,
472 run_isolated.get_as_zip_package().zip_into_buffer(compress=False))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000473
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000474 cleanup_script_name = 'swarm_cleanup.py'
Vadim Shtayurab450c602014-05-12 19:23:25 -0700475 if bundle and cleanup_script_name not in bundle.files:
476 bundle.add_file(
477 os.path.join(TOOLS_PATH, cleanup_script_name), cleanup_script_name)
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000478
maruel@chromium.org0437a732013-08-27 16:05:52 +0000479 run_cmd = [
480 'python', run_test_name,
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000481 '--hash', manifest.isolated_hash,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500482 '--namespace', manifest.namespace,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000483 ]
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500484 if file_path.is_url(manifest.isolate_server):
485 run_cmd.extend(('--isolate-server', manifest.isolate_server))
486 else:
487 run_cmd.extend(('--indir', manifest.isolate_server))
488
maruel@chromium.org0437a732013-08-27 16:05:52 +0000489 if manifest.verbose or manifest.profile:
490 # Have it print the profiling section.
491 run_cmd.append('--verbose')
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700492
493 # Pass all extra args for run_isolated.py, it will pass them to the command.
494 if manifest.extra_args:
495 run_cmd.append('--')
496 run_cmd.extend(manifest.extra_args)
497
maruel@chromium.org0437a732013-08-27 16:05:52 +0000498 manifest.add_task('Run Test', run_cmd)
499
500 # Clean up
501 manifest.add_task('Clean Up', ['python', cleanup_script_name])
502
503
Vadim Shtayurab450c602014-05-12 19:23:25 -0700504def setup_googletest(env, shards, index):
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500505 """Sets googletest specific environment variables."""
506 if shards > 1:
507 env = env.copy()
Vadim Shtayurab450c602014-05-12 19:23:25 -0700508 env['GTEST_SHARD_INDEX'] = str(index)
509 env['GTEST_TOTAL_SHARDS'] = str(shards)
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500510 return env
511
512
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500513def archive(isolate_server, namespace, isolated, algo, verbose):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000514 """Archives a .isolated and all the dependencies on the CAC."""
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500515 logging.info('archive(%s, %s, %s)', isolate_server, namespace, isolated)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000516 tempdir = None
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500517 if file_path.is_url(isolate_server):
518 command = 'archive'
519 flag = '--isolate-server'
520 else:
521 command = 'hashtable'
522 flag = '--outdir'
523
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500524 print('Archiving: %s' % isolated)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000525 try:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000526 cmd = [
527 sys.executable,
528 os.path.join(ROOT_DIR, 'isolate.py'),
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500529 command,
530 flag, isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500531 '--namespace', namespace,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000532 '--isolated', isolated,
533 ]
maruel@chromium.orge9403ab2013-09-20 18:03:49 +0000534 cmd.extend(['--verbose'] * verbose)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000535 logging.info(' '.join(cmd))
536 if subprocess.call(cmd, verbose):
537 return
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000538 return isolateserver.hash_file(isolated, algo)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000539 finally:
540 if tempdir:
541 shutil.rmtree(tempdir)
542
543
Vadim Shtayurab450c602014-05-12 19:23:25 -0700544def get_shard_task_name(task_name, shards, index):
545 """Returns a task name to use for a single shard of a task."""
546 if shards == 1:
547 return task_name
548 return '%s:%s:%s' % (task_name, shards, index)
549
550
551def upload_zip_bundle(isolate_server, bundle):
552 """Uploads a zip package to isolate storage and returns raw fetch URL.
553
554 Args:
555 isolate_server: URL of an isolate server.
556 bundle: instance of ZipPackage to upload.
557
558 Returns:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -0400559 URL to get the file from.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700560 """
561 # Swarming bot would need to be able to grab the file from the storage
562 # using raw HTTP GET. Use 'default' namespace so that the raw data returned
563 # to a bot is not zipped, since swarm_bot doesn't understand compressed
564 # data yet. This namespace have nothing to do with |namespace| passed to
565 # run_isolated.py that is used to store files for isolated task.
566 logging.info('Zipping up and uploading files...')
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -0400567 start_time = now()
568 isolate_item = isolateserver.BufferItem(
569 bundle.zip_into_buffer(), high_priority=True)
570 with isolateserver.get_storage(isolate_server, 'default') as storage:
571 uploaded = storage.upload_items([isolate_item])
572 bundle_url = storage.get_fetch_url(isolate_item)
573 elapsed = now() - start_time
Vadim Shtayurab450c602014-05-12 19:23:25 -0700574 if isolate_item in uploaded:
575 logging.info('Upload complete, time elapsed: %f', elapsed)
576 else:
577 logging.info('Zip file already on server, time elapsed: %f', elapsed)
578 return bundle_url
579
580
581def trigger_by_manifest(swarming, manifest):
582 """Given a task manifest, triggers it for execution on swarming.
583
584 Args:
585 swarming: URL of a swarming service.
586 manifest: instance of Manifest.
587
588 Returns:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400589 tuple(Task id, priority) on success. tuple(None, None) on failure.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700590 """
591 logging.info('Triggering: %s', manifest.task_name)
592 manifest_text = manifest.to_json()
593 result = net.url_read(swarming + '/test', data={'request': manifest_text})
594 if not result:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -0400595 on_error.report('Failed to trigger task %s' % manifest.task_name)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400596 return None
Vadim Shtayurab450c602014-05-12 19:23:25 -0700597 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400598 data = json.loads(result)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -0400599 except (ValueError, TypeError):
Vadim Shtayurab450c602014-05-12 19:23:25 -0700600 msg = '\n'.join((
601 'Failed to trigger task %s' % manifest.task_name,
602 'Manifest: %s' % manifest_text,
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -0400603 'Bad response: %s' % result))
604 on_error.report(msg)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400605 return None, None
606 if not data:
607 return None, None
608 return data['test_keys'][0]['test_key'], data['priority']
Vadim Shtayurab450c602014-05-12 19:23:25 -0700609
610
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400611def abort_task(_swarming, _manifest):
Vadim Shtayurab450c602014-05-12 19:23:25 -0700612 """Given a task manifest that was triggered, aborts its execution."""
613 # TODO(vadimsh): No supported by the server yet.
614
615
616def trigger_task_shards(
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700617 swarming, isolate_server, namespace, isolated_hash, task_name, extra_args,
Marc-Antoine Ruelaea50652014-06-12 14:23:48 -0400618 shards, dimensions, env, deadline, verbose, profile, priority):
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400619 """Triggers multiple subtasks of a sharded task.
620
621 Returns:
Vadim Shtayuraf27448e2014-06-26 11:35:05 -0700622 Dict with task details, returned to caller as part of --dump-json output.
623 None in case of failure.
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400624 """
Vadim Shtayurab450c602014-05-12 19:23:25 -0700625 # Collects all files that are necessary to bootstrap a task execution
626 # on the bot. Usually it includes self contained run_isolated.zip and
627 # a bunch of small other scripts. All heavy files are pulled
628 # by run_isolated.zip. Updated in 'setup_run_isolated'.
629 bundle = zip_package.ZipPackage(ROOT_DIR)
630
631 # Make a separate Manifest for each shard, put shard index and number of
632 # shards into env and subtask name.
633 manifests = []
634 for index in xrange(shards):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000635 manifest = Manifest(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500636 isolate_server=isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500637 namespace=namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500638 isolated_hash=isolated_hash,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700639 task_name=get_shard_task_name(task_name, shards, index),
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700640 extra_args=extra_args,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500641 dimensions=dimensions,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700642 env=setup_googletest(env, shards, index),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400643 deadline=deadline,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500644 verbose=verbose,
645 profile=profile,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800646 priority=priority)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700647 setup_run_isolated(manifest, bundle)
648 manifests.append(manifest)
649
650 # Upload zip bundle file to get its URL.
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -0400651 try:
652 bundle_url = upload_zip_bundle(isolate_server, bundle)
653 except (IOError, OSError):
654 on_error.report('Failed to upload the zip file for task %s' % task_name)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400655 return None, None
maruel@chromium.org0437a732013-08-27 16:05:52 +0000656
Vadim Shtayurab450c602014-05-12 19:23:25 -0700657 # Attach that file to all manifests.
658 for manifest in manifests:
659 manifest.add_bundled_file('swarm_data.zip', bundle_url)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000660
Vadim Shtayurab450c602014-05-12 19:23:25 -0700661 # Trigger all the subtasks.
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400662 tasks = {}
663 priority_warning = False
Vadim Shtayuraf27448e2014-06-26 11:35:05 -0700664 for index, manifest in enumerate(manifests):
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400665 task_id, priority = trigger_by_manifest(swarming, manifest)
666 if not task_id:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700667 break
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400668 if not priority_warning and priority != manifest.priority:
669 priority_warning = True
670 print >> sys.stderr, 'Priority was reset to %s' % priority
Vadim Shtayuraf27448e2014-06-26 11:35:05 -0700671 tasks[manifest.task_name] = {
672 'shard_index': index,
673 'task_id': task_id,
674 'view_url': '%s/user/task/%s' % (swarming, task_id),
675 }
Vadim Shtayurab450c602014-05-12 19:23:25 -0700676
677 # Some shards weren't triggered. Abort everything.
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400678 if len(tasks) != len(manifests):
679 if tasks:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700680 print >> sys.stderr, 'Not all shards were triggered'
Vadim Shtayuraf27448e2014-06-26 11:35:05 -0700681 for task_dict in tasks.itervalues():
682 abort_task(swarming, task_dict['task_id'])
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400683 return None
maruel@chromium.org0437a732013-08-27 16:05:52 +0000684
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400685 return tasks
maruel@chromium.org0437a732013-08-27 16:05:52 +0000686
687
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500688def isolated_to_hash(isolate_server, namespace, arg, algo, verbose):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500689 """Archives a .isolated file if needed.
690
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500691 Returns the file hash to trigger and a bool specifying if it was a file (True)
692 or a hash (False).
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500693 """
694 if arg.endswith('.isolated'):
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500695 file_hash = archive(isolate_server, namespace, arg, algo, verbose)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500696 if not file_hash:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -0400697 on_error.report('Archival failure %s' % arg)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500698 return None, True
699 return file_hash, True
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500700 elif isolateserver.is_valid_hash(arg, algo):
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500701 return arg, False
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500702 else:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -0400703 on_error.report('Invalid hash %s' % arg)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500704 return None, False
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500705
706
maruel@chromium.org0437a732013-08-27 16:05:52 +0000707def trigger(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500708 swarming,
709 isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500710 namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500711 file_hash_or_isolated,
712 task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700713 extra_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500714 shards,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500715 dimensions,
716 env,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400717 deadline,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000718 verbose,
719 profile,
720 priority):
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400721 """Sends off the hash swarming task requests.
722
723 Returns:
724 tuple(dict(task_name: task_id), base task name). The dict of tasks is None
725 in case of failure.
726 """
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500727 file_hash, is_file = isolated_to_hash(
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500728 isolate_server, namespace, file_hash_or_isolated, hashlib.sha1, verbose)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500729 if not file_hash:
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500730 return 1, ''
731 if not task_name:
732 # If a file name was passed, use its base name of the isolated hash.
733 # Otherwise, use user name as an approximation of a task name.
734 if is_file:
735 key = os.path.splitext(os.path.basename(file_hash_or_isolated))[0]
736 else:
737 key = getpass.getuser()
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700738 task_name = '%s/%s/%s/%d' % (
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500739 key,
740 '_'.join('%s=%s' % (k, v) for k, v in sorted(dimensions.iteritems())),
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700741 file_hash,
742 now() * 1000)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500743
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400744 tasks = trigger_task_shards(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500745 swarming=swarming,
746 isolate_server=isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500747 namespace=namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500748 isolated_hash=file_hash,
749 task_name=task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700750 extra_args=extra_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500751 shards=shards,
752 dimensions=dimensions,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400753 deadline=deadline,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500754 env=env,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500755 verbose=verbose,
756 profile=profile,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800757 priority=priority)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400758 return tasks, task_name
maruel@chromium.org0437a732013-08-27 16:05:52 +0000759
760
Vadim Shtayurab450c602014-05-12 19:23:25 -0700761def decorate_shard_output(shard_index, result, shard_exit_code):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000762 """Returns wrapped output for swarming task shard."""
763 tag = 'index %s (machine tag: %s, id: %s)' % (
Vadim Shtayurab450c602014-05-12 19:23:25 -0700764 shard_index,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000765 result['machine_id'],
766 result.get('machine_tag', 'unknown'))
767 return (
768 '\n'
769 '================================================================\n'
770 'Begin output from shard %s\n'
771 '================================================================\n'
772 '\n'
773 '%s'
774 '================================================================\n'
Vadim Shtayura473455a2014-05-14 15:22:35 -0700775 'End output from shard %s.\nExit code %d (%s).\n'
776 '================================================================\n') % (
777 tag, result['output'] or NO_OUTPUT_FOUND, tag,
778 shard_exit_code, hex(0xffffffff & shard_exit_code))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000779
780
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700781def collect(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700782 url, task_name, shards, timeout, decorate,
783 print_status_updates, task_output_dir):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500784 """Retrieves results of a Swarming task."""
Vadim Shtayurab450c602014-05-12 19:23:25 -0700785 # Grab task keys for each shard. Order is important, used to figure out
786 # shard index based on the key.
787 # TODO(vadimsh): Simplify this once server support is added.
788 task_keys = []
789 for index in xrange(shards):
790 shard_task_name = get_shard_task_name(task_name, shards, index)
791 logging.info('Collecting %s', shard_task_name)
792 shard_task_keys = get_task_keys(url, shard_task_name)
793 if not shard_task_keys:
794 raise Failure('No task keys to get results with: %s' % shard_task_name)
795 if len(shard_task_keys) != 1:
796 raise Failure('Expecting only one shard for a task: %s' % shard_task_name)
797 task_keys.append(shard_task_keys[0])
maruel@chromium.org0437a732013-08-27 16:05:52 +0000798
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700799 # Collect output files only if explicitly asked with --task-output-dir option.
800 if task_output_dir:
801 output_collector = TaskOutputCollector(
802 task_output_dir, task_name, len(task_keys))
803 else:
804 output_collector = None
805
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700806 seen_shards = set()
Vadim Shtayurac524f512014-05-15 09:54:56 -0700807 exit_codes = []
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700808
809 try:
810 for index, output in yield_results(
811 url, task_keys, timeout, None, print_status_updates, output_collector):
812 seen_shards.add(index)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700813
814 # Grab first non-zero exit code as an overall shard exit code.
815 shard_exit_code = 0
816 for code in map(int, (output['exit_codes'] or '1').split(',')):
817 if code:
818 shard_exit_code = code
819 break
Vadim Shtayurac524f512014-05-15 09:54:56 -0700820 exit_codes.append(shard_exit_code)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700821
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700822 if decorate:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700823 print decorate_shard_output(index, output, shard_exit_code)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700824 else:
825 print(
826 '%s/%s: %s' % (
827 output['machine_id'],
828 output['machine_tag'],
829 output['exit_codes']))
830 print(''.join(' %s\n' % l for l in output['output'].splitlines()))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700831 finally:
832 if output_collector:
833 output_collector.finalize()
834
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700835 if len(seen_shards) != len(task_keys):
836 missing_shards = [x for x in range(len(task_keys)) if x not in seen_shards]
837 print >> sys.stderr, ('Results from some shards are missing: %s' %
838 ', '.join(map(str, missing_shards)))
Vadim Shtayurac524f512014-05-15 09:54:56 -0700839 return 1
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700840
Vadim Shtayurac524f512014-05-15 09:54:56 -0700841 return int(bool(any(exit_codes)))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000842
843
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400844def add_filter_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500845 parser.filter_group = tools.optparse.OptionGroup(parser, 'Filtering slaves')
846 parser.filter_group.add_option(
Marc-Antoine Ruelb39e8cf2014-01-20 10:39:31 -0500847 '-d', '--dimension', default=[], action='append', nargs=2,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500848 dest='dimensions', metavar='FOO bar',
849 help='dimension to filter on')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500850 parser.add_option_group(parser.filter_group)
851
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400852
Marc-Antoine Ruel025e7822014-05-01 11:50:24 -0400853def process_filter_options(parser, options):
854 options.dimensions = dict(options.dimensions)
855 if not options.dimensions:
856 parser.error('Please at least specify one --dimension')
857
858
Vadim Shtayurab450c602014-05-12 19:23:25 -0700859def add_sharding_options(parser):
860 parser.sharding_group = tools.optparse.OptionGroup(parser, 'Sharding options')
861 parser.sharding_group.add_option(
862 '--shards', type='int', default=1,
863 help='Number of shards to trigger and collect.')
864 parser.add_option_group(parser.sharding_group)
865
866
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400867def add_trigger_options(parser):
868 """Adds all options to trigger a task on Swarming."""
869 isolateserver.add_isolate_server_options(parser, True)
870 add_filter_options(parser)
871
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500872 parser.task_group = tools.optparse.OptionGroup(parser, 'Task properties')
873 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500874 '-e', '--env', default=[], action='append', nargs=2, metavar='FOO bar',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700875 help='Environment variables to set')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500876 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500877 '--priority', type='int', default=100,
878 help='The lower value, the more important the task is')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500879 parser.task_group.add_option(
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500880 '-T', '--task-name',
881 help='Display name of the task. It uniquely identifies the task. '
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700882 'Defaults to <base_name>/<dimensions>/<isolated hash>/<timestamp> '
883 'if an isolated file is provided, if a hash is provided, it '
884 'defaults to <user>/<dimensions>/<isolated hash>/<timestamp>')
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400885 parser.task_group.add_option(
886 '--deadline', type='int', default=6*60*60,
887 help='Seconds to allow the task to be pending for a bot to run before '
888 'this task request expires.')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500889 parser.add_option_group(parser.task_group)
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500890 # TODO(maruel): This is currently written in a chromium-specific way.
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500891 parser.group_logging.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000892 '--profile', action='store_true',
893 default=bool(os.environ.get('ISOLATE_DEBUG')),
894 help='Have run_isolated.py print profiling info')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000895
896
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500897def process_trigger_options(parser, options, args):
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500898 isolateserver.process_isolate_server_options(parser, options)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500899 if len(args) != 1:
900 parser.error('Must pass one .isolated file or its hash (sha1).')
Marc-Antoine Ruel025e7822014-05-01 11:50:24 -0400901 process_filter_options(parser, options)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000902
903
904def add_collect_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500905 parser.server_group.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000906 '-t', '--timeout',
907 type='float',
908 default=DEFAULT_SHARD_WAIT_TIME,
909 help='Timeout to wait for result, set to 0 for no timeout; default: '
910 '%default s')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500911 parser.group_logging.add_option(
912 '--decorate', action='store_true', help='Decorate output')
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700913 parser.group_logging.add_option(
914 '--print-status-updates', action='store_true',
915 help='Print periodic status updates')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700916 parser.task_output_group = tools.optparse.OptionGroup(parser, 'Task output')
917 parser.task_output_group.add_option(
918 '--task-output-dir',
919 help='Directory to put task results into. When the task finishes, this '
920 'directory contains <task-output-dir>/summary.json file with '
921 'a summary of task results across all shards, and per-shard '
922 'directory with output files produced by a shard: '
923 '<task-output-dir>/<zero-based-shard-index>/')
924 parser.add_option_group(parser.task_output_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000925
926
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700927def extract_isolated_command_extra_args(args):
928 try:
929 index = args.index('--')
930 except ValueError:
931 return (args, [])
932 return (args[:index], args[index+1:])
933
934
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500935@subcommand.usage('task_name')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000936def CMDcollect(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500937 """Retrieves results of a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000938
939 The result can be in multiple part if the execution was sharded. It can
940 potentially have retries.
941 """
942 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700943 add_sharding_options(parser)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000944 (options, args) = parser.parse_args(args)
945 if not args:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500946 parser.error('Must specify one task name.')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000947 elif len(args) > 1:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500948 parser.error('Must specify only one task name.')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000949
950 try:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700951 return collect(
952 options.swarming,
953 args[0],
Vadim Shtayurab450c602014-05-12 19:23:25 -0700954 options.shards,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700955 options.timeout,
956 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700957 options.print_status_updates,
958 options.task_output_dir)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -0400959 except Failure:
960 on_error.report(None)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +0000961 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000962
963
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400964def CMDquery(parser, args):
965 """Returns information about the bots connected to the Swarming server."""
966 add_filter_options(parser)
967 parser.filter_group.add_option(
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400968 '--dead-only', action='store_true',
969 help='Only print dead bots, useful to reap them and reimage broken bots')
970 parser.filter_group.add_option(
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400971 '-k', '--keep-dead', action='store_true',
972 help='Do not filter out dead bots')
973 parser.filter_group.add_option(
974 '-b', '--bare', action='store_true',
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400975 help='Do not print out dimensions')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400976 options, args = parser.parse_args(args)
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400977
978 if options.keep_dead and options.dead_only:
979 parser.error('Use only one of --keep-dead and --dead-only')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400980 service = net.get_http_service(options.swarming)
981 data = service.json_request('GET', '/swarming/api/v1/bots')
982 if data is None:
983 print >> sys.stderr, 'Failed to access %s' % options.swarming
984 return 1
985 timeout = datetime.timedelta(seconds=data['machine_death_timeout'])
986 utcnow = datetime.datetime.utcnow()
Marc-Antoine Ruele4bebbc2014-06-04 09:36:14 -0400987 for machine in natsort.natsorted(data['machines'], key=lambda x: x['id']):
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400988 last_seen = datetime.datetime.strptime(
989 machine['last_seen'], '%Y-%m-%d %H:%M:%S')
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400990 is_dead = utcnow - last_seen > timeout
991 if options.dead_only:
992 if not is_dead:
993 continue
994 elif not options.keep_dead and is_dead:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400995 continue
996
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400997 # If the user requested to filter on dimensions, ensure the bot has all the
998 # dimensions requested.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400999 dimensions = machine['dimensions']
1000 for key, value in options.dimensions:
1001 if key not in dimensions:
1002 break
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -04001003 # A bot can have multiple value for a key, for example,
1004 # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
1005 # be accepted.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -04001006 if isinstance(dimensions[key], list):
1007 if value not in dimensions[key]:
1008 break
1009 else:
1010 if value != dimensions[key]:
1011 break
1012 else:
Marc-Antoine Ruele4bebbc2014-06-04 09:36:14 -04001013 print machine['id']
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -04001014 if not options.bare:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -04001015 print ' %s' % dimensions
1016 return 0
1017
1018
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001019@subcommand.usage('(hash|isolated) [-- extra_args]')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001020def CMDrun(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001021 """Triggers a task and wait for the results.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001022
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001023 Basically, does everything to run a command remotely.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001024 """
1025 add_trigger_options(parser)
1026 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001027 add_sharding_options(parser)
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001028 args, isolated_cmd_args = extract_isolated_command_extra_args(args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001029 options, args = parser.parse_args(args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001030 process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001031
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001032 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001033 tasks, task_name = trigger(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001034 swarming=options.swarming,
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05001035 isolate_server=options.isolate_server or options.indir,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001036 namespace=options.namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001037 file_hash_or_isolated=args[0],
1038 task_name=options.task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001039 extra_args=isolated_cmd_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001040 shards=options.shards,
1041 dimensions=options.dimensions,
1042 env=dict(options.env),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -04001043 deadline=options.deadline,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001044 verbose=options.verbose,
1045 profile=options.profile,
1046 priority=options.priority)
1047 except Failure as e:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001048 on_error.report(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001049 'Failed to trigger %s(%s): %s' %
1050 (options.task_name, args[0], e.args[0]))
1051 return 1
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001052 if not tasks:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001053 on_error.report('Failed to trigger the task.')
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001054 return 1
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001055 if task_name != options.task_name:
1056 print('Triggered task: %s' % task_name)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001057 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001058 # TODO(maruel): Use task_ids, it's much more efficient!
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001059 return collect(
1060 options.swarming,
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001061 task_name,
Vadim Shtayurab450c602014-05-12 19:23:25 -07001062 options.shards,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001063 options.timeout,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -07001064 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001065 options.print_status_updates,
1066 options.task_output_dir)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001067 except Failure:
1068 on_error.report(None)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001069 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001070
1071
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001072@subcommand.usage("(hash|isolated) [-- extra_args]")
maruel@chromium.org0437a732013-08-27 16:05:52 +00001073def CMDtrigger(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001074 """Triggers a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001075
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001076 Accepts either the hash (sha1) of a .isolated file already uploaded or the
1077 path to an .isolated file to archive, packages it if needed and sends a
1078 Swarming manifest file to the Swarming server.
1079
1080 If an .isolated file is specified instead of an hash, it is first archived.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001081
1082 Passes all extra arguments provided after '--' as additional command line
1083 arguments for an isolated command specified in *.isolate file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001084 """
1085 add_trigger_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001086 add_sharding_options(parser)
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001087 args, isolated_cmd_args = extract_isolated_command_extra_args(args)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001088 parser.add_option(
1089 '--dump-json',
1090 metavar='FILE',
1091 help='Dump details about the triggered task(s) to this file as json')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001092 options, args = parser.parse_args(args)
1093 process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001094
1095 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001096 tasks, task_name = trigger(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001097 swarming=options.swarming,
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05001098 isolate_server=options.isolate_server or options.indir,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001099 namespace=options.namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001100 file_hash_or_isolated=args[0],
1101 task_name=options.task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001102 extra_args=isolated_cmd_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001103 shards=options.shards,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001104 dimensions=options.dimensions,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -05001105 env=dict(options.env),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -04001106 deadline=options.deadline,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001107 verbose=options.verbose,
1108 profile=options.profile,
1109 priority=options.priority)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001110 if tasks:
1111 if task_name != options.task_name:
1112 print('Triggered task: %s' % task_name)
1113 if options.dump_json:
1114 data = {
1115 'base_task_name': task_name,
1116 'tasks': tasks,
1117 }
1118 tools.write_json(options.dump_json, data, True)
1119 return int(not tasks)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001120 except Failure:
1121 on_error.report(None)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001122 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001123
1124
1125class OptionParserSwarming(tools.OptionParserWithLogging):
1126 def __init__(self, **kwargs):
1127 tools.OptionParserWithLogging.__init__(
1128 self, prog='swarming.py', **kwargs)
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001129 self.server_group = tools.optparse.OptionGroup(self, 'Server')
1130 self.server_group.add_option(
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001131 '-S', '--swarming',
Kevin Graney5346c162014-01-24 12:20:01 -05001132 metavar='URL', default=os.environ.get('SWARMING_SERVER', ''),
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001133 help='Swarming server to use')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001134 self.add_option_group(self.server_group)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001135 auth.add_auth_options(self)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001136
1137 def parse_args(self, *args, **kwargs):
1138 options, args = tools.OptionParserWithLogging.parse_args(
1139 self, *args, **kwargs)
1140 options.swarming = options.swarming.rstrip('/')
1141 if not options.swarming:
1142 self.error('--swarming is required.')
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001143 auth.process_auth_options(self, options)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001144 return options, args
1145
1146
1147def main(args):
1148 dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001149 return dispatcher.execute(OptionParserSwarming(version=__version__), args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001150
1151
1152if __name__ == '__main__':
1153 fix_encoding.fix_encoding()
1154 tools.disable_buffering()
1155 colorama.init()
1156 sys.exit(main(sys.argv[1:]))