blob: c35c56528c069286a84d64344eafa4494020abc1 [file] [log] [blame]
maruel@chromium.org0437a732013-08-27 16:05:52 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00005
6"""Client tool to trigger tasks or retrieve results from a Swarming server."""
7
Vadim Shtayuraf27448e2014-06-26 11:35:05 -07008__version__ = '0.4.10'
maruel@chromium.org0437a732013-08-27 16:05:52 +00009
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040010import datetime
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -050011import getpass
maruel@chromium.org0437a732013-08-27 16:05:52 +000012import hashlib
13import json
14import logging
15import os
Vadim Shtayurae3fbd102014-04-29 17:05:21 -070016import re
maruel@chromium.org0437a732013-08-27 16:05:52 +000017import shutil
maruel@chromium.org0437a732013-08-27 16:05:52 +000018import subprocess
19import sys
Vadim Shtayurab19319e2014-04-27 08:50:06 -070020import threading
maruel@chromium.org0437a732013-08-27 16:05:52 +000021import time
22import urllib
maruel@chromium.org0437a732013-08-27 16:05:52 +000023
24from third_party import colorama
25from third_party.depot_tools import fix_encoding
26from third_party.depot_tools import subcommand
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000027
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -050028from utils import file_path
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040029from third_party.chromium import natsort
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000030from utils import net
maruel@chromium.org0437a732013-08-27 16:05:52 +000031from utils import threading_utils
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000032from utils import tools
33from utils import zip_package
maruel@chromium.org0437a732013-08-27 16:05:52 +000034
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080035import auth
maruel@chromium.org7b844a62013-09-17 13:04:59 +000036import isolateserver
maruel@chromium.org0437a732013-08-27 16:05:52 +000037import run_isolated
38
39
40ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
41TOOLS_PATH = os.path.join(ROOT_DIR, 'tools')
42
43
maruel@chromium.org0437a732013-08-27 16:05:52 +000044# The default time to wait for a shard to finish running.
csharp@chromium.org24758492013-08-28 19:10:54 +000045DEFAULT_SHARD_WAIT_TIME = 80 * 60.
maruel@chromium.org0437a732013-08-27 16:05:52 +000046
Vadim Shtayura86a2cef2014-04-18 11:13:39 -070047# How often to print status updates to stdout in 'collect'.
48STATUS_UPDATE_INTERVAL = 15 * 60.
49
maruel@chromium.org0437a732013-08-27 16:05:52 +000050
51NO_OUTPUT_FOUND = (
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -050052 'No output produced by the task, it may have failed to run.\n'
maruel@chromium.org0437a732013-08-27 16:05:52 +000053 '\n')
54
55
maruel@chromium.org0437a732013-08-27 16:05:52 +000056class Failure(Exception):
57 """Generic failure."""
58 pass
59
60
61class Manifest(object):
Vadim Shtayurab450c602014-05-12 19:23:25 -070062 """Represents a Swarming task manifest."""
maruel@chromium.org0437a732013-08-27 16:05:52 +000063
maruel@chromium.org0437a732013-08-27 16:05:52 +000064 def __init__(
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070065 self, isolate_server, namespace, isolated_hash, task_name, extra_args,
Marc-Antoine Ruelaea50652014-06-12 14:23:48 -040066 env, dimensions, deadline, verbose, profile,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070067 priority):
maruel@chromium.org0437a732013-08-27 16:05:52 +000068 """Populates a manifest object.
69 Args:
Marc-Antoine Ruela7049872013-11-05 19:28:35 -050070 isolate_server - isolate server url.
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050071 namespace - isolate server namespace to use.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070072 isolated_hash - the manifest's sha-1 that the slave is going to fetch.
73 task_name - the name to give the task request.
74 extra_args - additional arguments to pass to isolated command.
Marc-Antoine Ruel05dab5e2013-11-06 15:06:47 -050075 env - environment variables to set.
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -050076 dimensions - dimensions to filter the task on.
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -040077 deadline - maximum pending time before this task expires.
maruel@chromium.org0437a732013-08-27 16:05:52 +000078 verbose - if True, have the slave print more details.
79 profile - if True, have the slave print more timing data.
maruel@chromium.org7b844a62013-09-17 13:04:59 +000080 priority - int between 0 and 1000, lower the higher priority.
maruel@chromium.org0437a732013-08-27 16:05:52 +000081 """
Marc-Antoine Ruela7049872013-11-05 19:28:35 -050082 self.isolate_server = isolate_server
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050083 self.namespace = namespace
maruel@chromium.org814d23f2013-10-01 19:08:00 +000084 self.isolated_hash = isolated_hash
Vadim Shtayurab450c602014-05-12 19:23:25 -070085 self.task_name = task_name
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070086 self.extra_args = tuple(extra_args or [])
Vadim Shtayurab450c602014-05-12 19:23:25 -070087 self.env = env.copy()
88 self.dimensions = dimensions.copy()
Vadim Shtayurab450c602014-05-12 19:23:25 -070089 self.deadline = deadline
maruel@chromium.org0437a732013-08-27 16:05:52 +000090 self.verbose = bool(verbose)
91 self.profile = bool(profile)
92 self.priority = priority
maruel@chromium.org0437a732013-08-27 16:05:52 +000093 self._tasks = []
Vadim Shtayurab450c602014-05-12 19:23:25 -070094 self._files = []
maruel@chromium.org0437a732013-08-27 16:05:52 +000095
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -040096 def add_task(self, task_name, actions, time_out=2*60*60):
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -050097 """Appends a new task as a TestObject to the swarming manifest file.
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -050098
99 Tasks cannot be added once the manifest was uploaded.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500100
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -0400101 By default, command will be killed after 2 hours of execution.
102
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500103 See TestObject in services/swarming/src/common/test_request_message.py for
104 the valid format.
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500105 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000106 self._tasks.append(
107 {
108 'action': actions,
109 'decorate_output': self.verbose,
110 'test_name': task_name,
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -0400111 'hard_time_out': time_out,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000112 })
113
Vadim Shtayurab450c602014-05-12 19:23:25 -0700114 def add_bundled_file(self, file_name, file_url):
115 """Appends a file to the manifest.
116
117 File will be downloaded and extracted by the swarm bot before launching the
118 task.
119 """
120 self._files.append([file_url, file_name])
121
maruel@chromium.org0437a732013-08-27 16:05:52 +0000122 def to_json(self):
123 """Exports the current configuration into a swarm-readable manifest file.
124
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500125 The actual serialization format is defined as a TestCase object as described
126 in services/swarming/src/common/test_request_message.py
maruel@chromium.org0437a732013-08-27 16:05:52 +0000127 """
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500128 request = {
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500129 'cleanup': 'root',
maruel@chromium.org0437a732013-08-27 16:05:52 +0000130 'configurations': [
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500131 # Is a TestConfiguration.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000132 {
Marc-Antoine Ruel5d799192013-11-06 15:20:39 -0500133 'config_name': 'isolated',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700134 'deadline_to_run': self.deadline,
135 'dimensions': self.dimensions,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500136 'priority': self.priority,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000137 },
138 ],
Vadim Shtayurab450c602014-05-12 19:23:25 -0700139 'data': self._files,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700140 'env_vars': self.env,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700141 'test_case_name': self.task_name,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500142 'tests': self._tasks,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000143 }
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500144 return json.dumps(request, sort_keys=True, separators=(',',':'))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000145
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500146
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700147class TaskOutputCollector(object):
148 """Fetches task output from isolate server to local disk.
149
150 This object is shared among multiple threads running 'retrieve_results'
151 function, in particular they call 'process_shard_result' method in parallel.
152 """
153
154 def __init__(self, task_output_dir, task_name, shard_count):
155 """Initializes TaskOutputCollector, ensures |task_output_dir| exists.
156
157 Args:
158 task_output_dir: local directory to put fetched files to.
159 task_name: name of the swarming task results belong to.
160 shard_count: expected number of task shards.
161 """
162 self.task_output_dir = task_output_dir
163 self.task_name = task_name
164 self.shard_count = shard_count
165
166 self._lock = threading.Lock()
167 self._per_shard_results = {}
168 self._storage = None
169
170 if not os.path.isdir(self.task_output_dir):
171 os.makedirs(self.task_output_dir)
172
Vadim Shtayurab450c602014-05-12 19:23:25 -0700173 def process_shard_result(self, shard_index, result):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700174 """Stores results of a single task shard, fetches output files if necessary.
175
176 Called concurrently from multiple threads.
177 """
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700178 # Sanity check index is in expected range.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700179 assert isinstance(shard_index, int)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700180 if shard_index < 0 or shard_index >= self.shard_count:
181 logging.warning(
182 'Shard index %d is outside of expected range: [0; %d]',
183 shard_index, self.shard_count - 1)
184 return
185
186 # Store result dict of that shard, ignore results we've already seen.
187 with self._lock:
188 if shard_index in self._per_shard_results:
189 logging.warning('Ignoring duplicate shard index %d', shard_index)
190 return
191 self._per_shard_results[shard_index] = result
192
193 # Fetch output files if necessary.
194 isolated_files_location = extract_output_files_location(result['output'])
195 if isolated_files_location:
196 isolate_server, namespace, isolated_hash = isolated_files_location
197 storage = self._get_storage(isolate_server, namespace)
198 if storage:
199 # Output files are supposed to be small and they are not reused across
200 # tasks. So use MemoryCache for them instead of on-disk cache. Make
201 # files writable, so that calling script can delete them.
202 isolateserver.fetch_isolated(
203 isolated_hash,
204 storage,
205 isolateserver.MemoryCache(file_mode_mask=0700),
206 os.path.join(self.task_output_dir, str(shard_index)),
207 False)
208
209 def finalize(self):
210 """Writes summary.json, shutdowns underlying Storage."""
211 with self._lock:
212 # Write an array of shard results with None for missing shards.
213 summary = {
214 'task_name': self.task_name,
215 'shards': [
216 self._per_shard_results.get(i) for i in xrange(self.shard_count)
217 ],
218 }
219 tools.write_json(
220 os.path.join(self.task_output_dir, 'summary.json'),
221 summary,
222 False)
223 if self._storage:
224 self._storage.close()
225 self._storage = None
226
227 def _get_storage(self, isolate_server, namespace):
228 """Returns isolateserver.Storage to use to fetch files."""
229 with self._lock:
230 if not self._storage:
231 self._storage = isolateserver.get_storage(isolate_server, namespace)
232 else:
233 # Shards must all use exact same isolate server and namespace.
234 if self._storage.location != isolate_server:
235 logging.error(
236 'Task shards are using multiple isolate servers: %s and %s',
237 self._storage.location, isolate_server)
238 return None
239 if self._storage.namespace != namespace:
240 logging.error(
241 'Task shards are using multiple namespaces: %s and %s',
242 self._storage.namespace, namespace)
243 return None
244 return self._storage
245
246
maruel@chromium.org0437a732013-08-27 16:05:52 +0000247def now():
248 """Exists so it can be mocked easily."""
249 return time.time()
250
251
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500252def get_task_keys(swarm_base_url, task_name):
253 """Returns the Swarming task key for each shards of task_name."""
254 key_data = urllib.urlencode([('name', task_name)])
maruel@chromium.org0437a732013-08-27 16:05:52 +0000255 url = '%s/get_matching_test_cases?%s' % (swarm_base_url, key_data)
256
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000257 for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
258 result = net.url_read(url, retry_404=True)
259 if result is None:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000260 raise Failure(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500261 'Error: Unable to find any task with the name, %s, on swarming server'
262 % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000263
maruel@chromium.org0437a732013-08-27 16:05:52 +0000264 # TODO(maruel): Compare exact string.
265 if 'No matching' in result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500266 logging.warning('Unable to find any task with the name, %s, on swarming '
267 'server' % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000268 continue
269 return json.loads(result)
270
271 raise Failure(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500272 'Error: Unable to find any task with the name, %s, on swarming server'
273 % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000274
275
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700276def extract_output_files_location(task_log):
277 """Task log -> location of task output files to fetch.
278
279 TODO(vadimsh,maruel): Use side-channel to get this information.
280 See 'run_tha_test' in run_isolated.py for where the data is generated.
281
282 Returns:
283 Tuple (isolate server URL, namespace, isolated hash) on success.
284 None if information is missing or can not be parsed.
285 """
286 match = re.search(
287 r'\[run_isolated_out_hack\](.*)\[/run_isolated_out_hack\]',
288 task_log,
289 re.DOTALL)
290 if not match:
291 return None
292
293 def to_ascii(val):
294 if not isinstance(val, basestring):
295 raise ValueError()
296 return val.encode('ascii')
297
298 try:
299 data = json.loads(match.group(1))
300 if not isinstance(data, dict):
301 raise ValueError()
302 isolated_hash = to_ascii(data['hash'])
303 namespace = to_ascii(data['namespace'])
304 isolate_server = to_ascii(data['storage'])
305 if not file_path.is_url(isolate_server):
306 raise ValueError()
307 return (isolate_server, namespace, isolated_hash)
308 except (KeyError, ValueError):
309 logging.warning(
310 'Unexpected value of run_isolated_out_hack: %s', match.group(1))
311 return None
312
313
314def retrieve_results(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700315 base_url, shard_index, task_key, timeout, should_stop, output_collector):
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700316 """Retrieves results for a single task_key.
317
Vadim Shtayurab450c602014-05-12 19:23:25 -0700318 Returns:
319 <result dict> on success.
320 None on failure.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700321 """
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000322 assert isinstance(timeout, float), timeout
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500323 params = [('r', task_key)]
maruel@chromium.org0437a732013-08-27 16:05:52 +0000324 result_url = '%s/get_result?%s' % (base_url, urllib.urlencode(params))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700325 started = now()
326 deadline = started + timeout if timeout else None
327 attempt = 0
328
329 while not should_stop.is_set():
330 attempt += 1
331
332 # Waiting for too long -> give up.
333 current_time = now()
334 if deadline and current_time >= deadline:
335 logging.error('retrieve_results(%s) timed out on attempt %d',
336 base_url, attempt)
337 return None
338
339 # Do not spin too fast. Spin faster at the beginning though.
340 # Start with 1 sec delay and for each 30 sec of waiting add another second
341 # of delay, until hitting 15 sec ceiling.
342 if attempt > 1:
343 max_delay = min(15, 1 + (current_time - started) / 30.0)
344 delay = min(max_delay, deadline - current_time) if deadline else max_delay
345 if delay > 0:
346 logging.debug('Waiting %.1f sec before retrying', delay)
347 should_stop.wait(delay)
348 if should_stop.is_set():
349 return None
350
351 # Disable internal retries in net.url_read, since we are doing retries
352 # ourselves. Do not use retry_404 so should_stop is polled more often.
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000353 response = net.url_read(result_url, retry_404=False, retry_50x=False)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700354
355 # Request failed. Try again.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000356 if response is None:
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700357 continue
358
359 # Got some response, ensure it is JSON dict, retry if not.
360 try:
361 result = json.loads(response) or {}
362 if not isinstance(result, dict):
363 raise ValueError()
364 except (ValueError, TypeError):
365 logging.warning(
366 'Received corrupted or invalid data for task_key %s, retrying: %r',
367 task_key, response)
368 continue
369
370 # Swarming server uses non-empty 'output' value as a flag that task has
371 # finished. How to wait for tasks that produce no output is a mystery.
372 if result.get('output'):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700373 # Record the result, try to fetch attached output files (if any).
374 if output_collector:
375 # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700376 output_collector.process_shard_result(shard_index, result)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700377 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +0000378
379
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700380def yield_results(
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700381 swarm_base_url, task_keys, timeout, max_threads,
382 print_status_updates, output_collector):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500383 """Yields swarming task results from the swarming server as (index, result).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000384
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700385 Duplicate shards are ignored. Shards are yielded in order of completion.
386 Timed out shards are NOT yielded at all. Caller can compare number of yielded
387 shards with len(task_keys) to verify all shards completed.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000388
389 max_threads is optional and is used to limit the number of parallel fetches
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500390 done. Since in general the number of task_keys is in the range <=10, it's not
maruel@chromium.org0437a732013-08-27 16:05:52 +0000391 worth normally to limit the number threads. Mostly used for testing purposes.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500392
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700393 output_collector is an optional instance of TaskOutputCollector that will be
394 used to fetch files produced by a task from isolate server to the local disk.
395
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500396 Yields:
397 (index, result). In particular, 'result' is defined as the
398 GetRunnerResults() function in services/swarming/server/test_runner.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000399 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000400 number_threads = (
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500401 min(max_threads, len(task_keys)) if max_threads else len(task_keys))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700402 should_stop = threading.Event()
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700403 results_channel = threading_utils.TaskChannel()
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700404
maruel@chromium.org0437a732013-08-27 16:05:52 +0000405 with threading_utils.ThreadPool(number_threads, number_threads, 0) as pool:
406 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700407 # Adds a task to the thread pool to call 'retrieve_results' and return
408 # the results together with shard_index that produced them (as a tuple).
409 def enqueue_retrieve_results(shard_index, task_key):
410 task_fn = lambda *args: (shard_index, retrieve_results(*args))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000411 pool.add_task(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700412 0, results_channel.wrap_task(task_fn),
413 swarm_base_url, shard_index, task_key, timeout,
414 should_stop, output_collector)
415
416 # Enqueue 'retrieve_results' calls for each shard key to run in parallel.
417 for shard_index, task_key in enumerate(task_keys):
418 enqueue_retrieve_results(shard_index, task_key)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700419
420 # Wait for all of them to finish.
421 shards_remaining = range(len(task_keys))
422 active_task_count = len(task_keys)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700423 while active_task_count:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700424 shard_index, result = None, None
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700425 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700426 shard_index, result = results_channel.pull(
427 timeout=STATUS_UPDATE_INTERVAL)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700428 except threading_utils.TaskChannel.Timeout:
429 if print_status_updates:
430 print(
431 'Waiting for results from the following shards: %s' %
432 ', '.join(map(str, shards_remaining)))
433 sys.stdout.flush()
434 continue
435 except Exception:
436 logging.exception('Unexpected exception in retrieve_results')
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700437
438 # A call to 'retrieve_results' finished (successfully or not).
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700439 active_task_count -= 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000440 if not result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500441 logging.error('Failed to retrieve the results for a swarming key')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000442 continue
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700443
Vadim Shtayurab450c602014-05-12 19:23:25 -0700444 # Yield back results to the caller.
445 assert shard_index in shards_remaining
446 shards_remaining.remove(shard_index)
447 yield shard_index, result
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700448
maruel@chromium.org0437a732013-08-27 16:05:52 +0000449 finally:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700450 # Done or aborted with Ctrl+C, kill the remaining threads.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000451 should_stop.set()
452
453
Vadim Shtayurab450c602014-05-12 19:23:25 -0700454def setup_run_isolated(manifest, bundle):
455 """Sets up the manifest to run an isolated task via run_isolated.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000456
Vadim Shtayurab450c602014-05-12 19:23:25 -0700457 Modifies |bundle| (by adding files) and |manifest| (by adding commands) in
458 place.
459
460 Args:
461 manifest: Manifest with swarm task definition.
462 bundle: ZipPackage with files that would be transfered to swarm bot.
463 If None, only |manifest| is modified (useful in tests).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000464 """
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000465 # Add uncompressed zip here. It'll be compressed as part of the package sent
466 # to Swarming server.
467 run_test_name = 'run_isolated.zip'
Vadim Shtayurab450c602014-05-12 19:23:25 -0700468 if bundle and run_test_name not in bundle.files:
469 bundle.add_buffer(
470 run_test_name,
471 run_isolated.get_as_zip_package().zip_into_buffer(compress=False))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000472
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000473 cleanup_script_name = 'swarm_cleanup.py'
Vadim Shtayurab450c602014-05-12 19:23:25 -0700474 if bundle and cleanup_script_name not in bundle.files:
475 bundle.add_file(
476 os.path.join(TOOLS_PATH, cleanup_script_name), cleanup_script_name)
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000477
maruel@chromium.org0437a732013-08-27 16:05:52 +0000478 run_cmd = [
479 'python', run_test_name,
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000480 '--hash', manifest.isolated_hash,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500481 '--namespace', manifest.namespace,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000482 ]
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500483 if file_path.is_url(manifest.isolate_server):
484 run_cmd.extend(('--isolate-server', manifest.isolate_server))
485 else:
486 run_cmd.extend(('--indir', manifest.isolate_server))
487
maruel@chromium.org0437a732013-08-27 16:05:52 +0000488 if manifest.verbose or manifest.profile:
489 # Have it print the profiling section.
490 run_cmd.append('--verbose')
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700491
492 # Pass all extra args for run_isolated.py, it will pass them to the command.
493 if manifest.extra_args:
494 run_cmd.append('--')
495 run_cmd.extend(manifest.extra_args)
496
maruel@chromium.org0437a732013-08-27 16:05:52 +0000497 manifest.add_task('Run Test', run_cmd)
498
499 # Clean up
500 manifest.add_task('Clean Up', ['python', cleanup_script_name])
501
502
Vadim Shtayurab450c602014-05-12 19:23:25 -0700503def setup_googletest(env, shards, index):
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500504 """Sets googletest specific environment variables."""
505 if shards > 1:
506 env = env.copy()
Vadim Shtayurab450c602014-05-12 19:23:25 -0700507 env['GTEST_SHARD_INDEX'] = str(index)
508 env['GTEST_TOTAL_SHARDS'] = str(shards)
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500509 return env
510
511
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500512def archive(isolate_server, namespace, isolated, algo, verbose):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000513 """Archives a .isolated and all the dependencies on the CAC."""
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500514 logging.info('archive(%s, %s, %s)', isolate_server, namespace, isolated)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000515 tempdir = None
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500516 if file_path.is_url(isolate_server):
517 command = 'archive'
518 flag = '--isolate-server'
519 else:
520 command = 'hashtable'
521 flag = '--outdir'
522
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500523 print('Archiving: %s' % isolated)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000524 try:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000525 cmd = [
526 sys.executable,
527 os.path.join(ROOT_DIR, 'isolate.py'),
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500528 command,
529 flag, isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500530 '--namespace', namespace,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000531 '--isolated', isolated,
532 ]
maruel@chromium.orge9403ab2013-09-20 18:03:49 +0000533 cmd.extend(['--verbose'] * verbose)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000534 logging.info(' '.join(cmd))
535 if subprocess.call(cmd, verbose):
536 return
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000537 return isolateserver.hash_file(isolated, algo)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000538 finally:
539 if tempdir:
540 shutil.rmtree(tempdir)
541
542
Vadim Shtayurab450c602014-05-12 19:23:25 -0700543def get_shard_task_name(task_name, shards, index):
544 """Returns a task name to use for a single shard of a task."""
545 if shards == 1:
546 return task_name
547 return '%s:%s:%s' % (task_name, shards, index)
548
549
550def upload_zip_bundle(isolate_server, bundle):
551 """Uploads a zip package to isolate storage and returns raw fetch URL.
552
553 Args:
554 isolate_server: URL of an isolate server.
555 bundle: instance of ZipPackage to upload.
556
557 Returns:
558 URL to get the file from on success.
559 None on failure.
560 """
561 # Swarming bot would need to be able to grab the file from the storage
562 # using raw HTTP GET. Use 'default' namespace so that the raw data returned
563 # to a bot is not zipped, since swarm_bot doesn't understand compressed
564 # data yet. This namespace have nothing to do with |namespace| passed to
565 # run_isolated.py that is used to store files for isolated task.
566 logging.info('Zipping up and uploading files...')
567 try:
568 start_time = now()
569 isolate_item = isolateserver.BufferItem(
570 bundle.zip_into_buffer(), high_priority=True)
571 with isolateserver.get_storage(isolate_server, 'default') as storage:
572 uploaded = storage.upload_items([isolate_item])
573 bundle_url = storage.get_fetch_url(isolate_item)
574 elapsed = now() - start_time
575 except (IOError, OSError) as exc:
576 tools.report_error('Failed to upload the zip file: %s' % exc)
577 return None
578 if isolate_item in uploaded:
579 logging.info('Upload complete, time elapsed: %f', elapsed)
580 else:
581 logging.info('Zip file already on server, time elapsed: %f', elapsed)
582 return bundle_url
583
584
585def trigger_by_manifest(swarming, manifest):
586 """Given a task manifest, triggers it for execution on swarming.
587
588 Args:
589 swarming: URL of a swarming service.
590 manifest: instance of Manifest.
591
592 Returns:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400593 tuple(Task id, priority) on success. tuple(None, None) on failure.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700594 """
595 logging.info('Triggering: %s', manifest.task_name)
596 manifest_text = manifest.to_json()
597 result = net.url_read(swarming + '/test', data={'request': manifest_text})
598 if not result:
599 tools.report_error('Failed to trigger task %s' % manifest.task_name)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400600 return None
Vadim Shtayurab450c602014-05-12 19:23:25 -0700601 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400602 data = json.loads(result)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700603 except (ValueError, TypeError) as e:
604 msg = '\n'.join((
605 'Failed to trigger task %s' % manifest.task_name,
606 'Manifest: %s' % manifest_text,
607 'Bad response: %s' % result,
608 str(e)))
609 tools.report_error(msg)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400610 return None, None
611 if not data:
612 return None, None
613 return data['test_keys'][0]['test_key'], data['priority']
Vadim Shtayurab450c602014-05-12 19:23:25 -0700614
615
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400616def abort_task(_swarming, _manifest):
Vadim Shtayurab450c602014-05-12 19:23:25 -0700617 """Given a task manifest that was triggered, aborts its execution."""
618 # TODO(vadimsh): No supported by the server yet.
619
620
621def trigger_task_shards(
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700622 swarming, isolate_server, namespace, isolated_hash, task_name, extra_args,
Marc-Antoine Ruelaea50652014-06-12 14:23:48 -0400623 shards, dimensions, env, deadline, verbose, profile, priority):
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400624 """Triggers multiple subtasks of a sharded task.
625
626 Returns:
Vadim Shtayuraf27448e2014-06-26 11:35:05 -0700627 Dict with task details, returned to caller as part of --dump-json output.
628 None in case of failure.
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400629 """
Vadim Shtayurab450c602014-05-12 19:23:25 -0700630 # Collects all files that are necessary to bootstrap a task execution
631 # on the bot. Usually it includes self contained run_isolated.zip and
632 # a bunch of small other scripts. All heavy files are pulled
633 # by run_isolated.zip. Updated in 'setup_run_isolated'.
634 bundle = zip_package.ZipPackage(ROOT_DIR)
635
636 # Make a separate Manifest for each shard, put shard index and number of
637 # shards into env and subtask name.
638 manifests = []
639 for index in xrange(shards):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000640 manifest = Manifest(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500641 isolate_server=isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500642 namespace=namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500643 isolated_hash=isolated_hash,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700644 task_name=get_shard_task_name(task_name, shards, index),
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700645 extra_args=extra_args,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500646 dimensions=dimensions,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700647 env=setup_googletest(env, shards, index),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400648 deadline=deadline,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500649 verbose=verbose,
650 profile=profile,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800651 priority=priority)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700652 setup_run_isolated(manifest, bundle)
653 manifests.append(manifest)
654
655 # Upload zip bundle file to get its URL.
656 bundle_url = upload_zip_bundle(isolate_server, bundle)
657 if not bundle_url:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400658 return None, None
maruel@chromium.org0437a732013-08-27 16:05:52 +0000659
Vadim Shtayurab450c602014-05-12 19:23:25 -0700660 # Attach that file to all manifests.
661 for manifest in manifests:
662 manifest.add_bundled_file('swarm_data.zip', bundle_url)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000663
Vadim Shtayurab450c602014-05-12 19:23:25 -0700664 # Trigger all the subtasks.
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400665 tasks = {}
666 priority_warning = False
Vadim Shtayuraf27448e2014-06-26 11:35:05 -0700667 for index, manifest in enumerate(manifests):
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400668 task_id, priority = trigger_by_manifest(swarming, manifest)
669 if not task_id:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700670 break
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400671 if not priority_warning and priority != manifest.priority:
672 priority_warning = True
673 print >> sys.stderr, 'Priority was reset to %s' % priority
Vadim Shtayuraf27448e2014-06-26 11:35:05 -0700674 tasks[manifest.task_name] = {
675 'shard_index': index,
676 'task_id': task_id,
677 'view_url': '%s/user/task/%s' % (swarming, task_id),
678 }
Vadim Shtayurab450c602014-05-12 19:23:25 -0700679
680 # Some shards weren't triggered. Abort everything.
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400681 if len(tasks) != len(manifests):
682 if tasks:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700683 print >> sys.stderr, 'Not all shards were triggered'
Vadim Shtayuraf27448e2014-06-26 11:35:05 -0700684 for task_dict in tasks.itervalues():
685 abort_task(swarming, task_dict['task_id'])
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400686 return None
maruel@chromium.org0437a732013-08-27 16:05:52 +0000687
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400688 return tasks
maruel@chromium.org0437a732013-08-27 16:05:52 +0000689
690
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500691def isolated_to_hash(isolate_server, namespace, arg, algo, verbose):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500692 """Archives a .isolated file if needed.
693
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500694 Returns the file hash to trigger and a bool specifying if it was a file (True)
695 or a hash (False).
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500696 """
697 if arg.endswith('.isolated'):
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500698 file_hash = archive(isolate_server, namespace, arg, algo, verbose)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500699 if not file_hash:
700 tools.report_error('Archival failure %s' % arg)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500701 return None, True
702 return file_hash, True
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500703 elif isolateserver.is_valid_hash(arg, algo):
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500704 return arg, False
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500705 else:
706 tools.report_error('Invalid hash %s' % arg)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500707 return None, False
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500708
709
maruel@chromium.org0437a732013-08-27 16:05:52 +0000710def trigger(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500711 swarming,
712 isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500713 namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500714 file_hash_or_isolated,
715 task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700716 extra_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500717 shards,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500718 dimensions,
719 env,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400720 deadline,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000721 verbose,
722 profile,
723 priority):
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400724 """Sends off the hash swarming task requests.
725
726 Returns:
727 tuple(dict(task_name: task_id), base task name). The dict of tasks is None
728 in case of failure.
729 """
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500730 file_hash, is_file = isolated_to_hash(
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500731 isolate_server, namespace, file_hash_or_isolated, hashlib.sha1, verbose)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500732 if not file_hash:
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500733 return 1, ''
734 if not task_name:
735 # If a file name was passed, use its base name of the isolated hash.
736 # Otherwise, use user name as an approximation of a task name.
737 if is_file:
738 key = os.path.splitext(os.path.basename(file_hash_or_isolated))[0]
739 else:
740 key = getpass.getuser()
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700741 task_name = '%s/%s/%s/%d' % (
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500742 key,
743 '_'.join('%s=%s' % (k, v) for k, v in sorted(dimensions.iteritems())),
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700744 file_hash,
745 now() * 1000)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500746
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400747 tasks = trigger_task_shards(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500748 swarming=swarming,
749 isolate_server=isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500750 namespace=namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500751 isolated_hash=file_hash,
752 task_name=task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700753 extra_args=extra_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500754 shards=shards,
755 dimensions=dimensions,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400756 deadline=deadline,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500757 env=env,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500758 verbose=verbose,
759 profile=profile,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800760 priority=priority)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400761 return tasks, task_name
maruel@chromium.org0437a732013-08-27 16:05:52 +0000762
763
Vadim Shtayurab450c602014-05-12 19:23:25 -0700764def decorate_shard_output(shard_index, result, shard_exit_code):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000765 """Returns wrapped output for swarming task shard."""
766 tag = 'index %s (machine tag: %s, id: %s)' % (
Vadim Shtayurab450c602014-05-12 19:23:25 -0700767 shard_index,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000768 result['machine_id'],
769 result.get('machine_tag', 'unknown'))
770 return (
771 '\n'
772 '================================================================\n'
773 'Begin output from shard %s\n'
774 '================================================================\n'
775 '\n'
776 '%s'
777 '================================================================\n'
Vadim Shtayura473455a2014-05-14 15:22:35 -0700778 'End output from shard %s.\nExit code %d (%s).\n'
779 '================================================================\n') % (
780 tag, result['output'] or NO_OUTPUT_FOUND, tag,
781 shard_exit_code, hex(0xffffffff & shard_exit_code))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000782
783
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700784def collect(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700785 url, task_name, shards, timeout, decorate,
786 print_status_updates, task_output_dir):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500787 """Retrieves results of a Swarming task."""
Vadim Shtayurab450c602014-05-12 19:23:25 -0700788 # Grab task keys for each shard. Order is important, used to figure out
789 # shard index based on the key.
790 # TODO(vadimsh): Simplify this once server support is added.
791 task_keys = []
792 for index in xrange(shards):
793 shard_task_name = get_shard_task_name(task_name, shards, index)
794 logging.info('Collecting %s', shard_task_name)
795 shard_task_keys = get_task_keys(url, shard_task_name)
796 if not shard_task_keys:
797 raise Failure('No task keys to get results with: %s' % shard_task_name)
798 if len(shard_task_keys) != 1:
799 raise Failure('Expecting only one shard for a task: %s' % shard_task_name)
800 task_keys.append(shard_task_keys[0])
maruel@chromium.org0437a732013-08-27 16:05:52 +0000801
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700802 # Collect output files only if explicitly asked with --task-output-dir option.
803 if task_output_dir:
804 output_collector = TaskOutputCollector(
805 task_output_dir, task_name, len(task_keys))
806 else:
807 output_collector = None
808
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700809 seen_shards = set()
Vadim Shtayurac524f512014-05-15 09:54:56 -0700810 exit_codes = []
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700811
812 try:
813 for index, output in yield_results(
814 url, task_keys, timeout, None, print_status_updates, output_collector):
815 seen_shards.add(index)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700816
817 # Grab first non-zero exit code as an overall shard exit code.
818 shard_exit_code = 0
819 for code in map(int, (output['exit_codes'] or '1').split(',')):
820 if code:
821 shard_exit_code = code
822 break
Vadim Shtayurac524f512014-05-15 09:54:56 -0700823 exit_codes.append(shard_exit_code)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700824
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700825 if decorate:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700826 print decorate_shard_output(index, output, shard_exit_code)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700827 else:
828 print(
829 '%s/%s: %s' % (
830 output['machine_id'],
831 output['machine_tag'],
832 output['exit_codes']))
833 print(''.join(' %s\n' % l for l in output['output'].splitlines()))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700834 finally:
835 if output_collector:
836 output_collector.finalize()
837
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700838 if len(seen_shards) != len(task_keys):
839 missing_shards = [x for x in range(len(task_keys)) if x not in seen_shards]
840 print >> sys.stderr, ('Results from some shards are missing: %s' %
841 ', '.join(map(str, missing_shards)))
Vadim Shtayurac524f512014-05-15 09:54:56 -0700842 return 1
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700843
Vadim Shtayurac524f512014-05-15 09:54:56 -0700844 return int(bool(any(exit_codes)))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000845
846
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400847def add_filter_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500848 parser.filter_group = tools.optparse.OptionGroup(parser, 'Filtering slaves')
849 parser.filter_group.add_option(
Marc-Antoine Ruelb39e8cf2014-01-20 10:39:31 -0500850 '-d', '--dimension', default=[], action='append', nargs=2,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500851 dest='dimensions', metavar='FOO bar',
852 help='dimension to filter on')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500853 parser.add_option_group(parser.filter_group)
854
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400855
Marc-Antoine Ruel025e7822014-05-01 11:50:24 -0400856def process_filter_options(parser, options):
857 options.dimensions = dict(options.dimensions)
858 if not options.dimensions:
859 parser.error('Please at least specify one --dimension')
860
861
Vadim Shtayurab450c602014-05-12 19:23:25 -0700862def add_sharding_options(parser):
863 parser.sharding_group = tools.optparse.OptionGroup(parser, 'Sharding options')
864 parser.sharding_group.add_option(
865 '--shards', type='int', default=1,
866 help='Number of shards to trigger and collect.')
867 parser.add_option_group(parser.sharding_group)
868
869
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400870def add_trigger_options(parser):
871 """Adds all options to trigger a task on Swarming."""
872 isolateserver.add_isolate_server_options(parser, True)
873 add_filter_options(parser)
874
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500875 parser.task_group = tools.optparse.OptionGroup(parser, 'Task properties')
876 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500877 '-e', '--env', default=[], action='append', nargs=2, metavar='FOO bar',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700878 help='Environment variables to set')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500879 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500880 '--priority', type='int', default=100,
881 help='The lower value, the more important the task is')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500882 parser.task_group.add_option(
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500883 '-T', '--task-name',
884 help='Display name of the task. It uniquely identifies the task. '
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700885 'Defaults to <base_name>/<dimensions>/<isolated hash>/<timestamp> '
886 'if an isolated file is provided, if a hash is provided, it '
887 'defaults to <user>/<dimensions>/<isolated hash>/<timestamp>')
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400888 parser.task_group.add_option(
889 '--deadline', type='int', default=6*60*60,
890 help='Seconds to allow the task to be pending for a bot to run before '
891 'this task request expires.')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500892 parser.add_option_group(parser.task_group)
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500893 # TODO(maruel): This is currently written in a chromium-specific way.
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500894 parser.group_logging.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000895 '--profile', action='store_true',
896 default=bool(os.environ.get('ISOLATE_DEBUG')),
897 help='Have run_isolated.py print profiling info')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000898
899
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500900def process_trigger_options(parser, options, args):
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500901 isolateserver.process_isolate_server_options(parser, options)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500902 if len(args) != 1:
903 parser.error('Must pass one .isolated file or its hash (sha1).')
Marc-Antoine Ruel025e7822014-05-01 11:50:24 -0400904 process_filter_options(parser, options)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000905
906
907def add_collect_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500908 parser.server_group.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000909 '-t', '--timeout',
910 type='float',
911 default=DEFAULT_SHARD_WAIT_TIME,
912 help='Timeout to wait for result, set to 0 for no timeout; default: '
913 '%default s')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500914 parser.group_logging.add_option(
915 '--decorate', action='store_true', help='Decorate output')
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700916 parser.group_logging.add_option(
917 '--print-status-updates', action='store_true',
918 help='Print periodic status updates')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700919 parser.task_output_group = tools.optparse.OptionGroup(parser, 'Task output')
920 parser.task_output_group.add_option(
921 '--task-output-dir',
922 help='Directory to put task results into. When the task finishes, this '
923 'directory contains <task-output-dir>/summary.json file with '
924 'a summary of task results across all shards, and per-shard '
925 'directory with output files produced by a shard: '
926 '<task-output-dir>/<zero-based-shard-index>/')
927 parser.add_option_group(parser.task_output_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000928
929
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700930def extract_isolated_command_extra_args(args):
931 try:
932 index = args.index('--')
933 except ValueError:
934 return (args, [])
935 return (args[:index], args[index+1:])
936
937
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500938@subcommand.usage('task_name')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000939def CMDcollect(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500940 """Retrieves results of a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000941
942 The result can be in multiple part if the execution was sharded. It can
943 potentially have retries.
944 """
945 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700946 add_sharding_options(parser)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000947 (options, args) = parser.parse_args(args)
948 if not args:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500949 parser.error('Must specify one task name.')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000950 elif len(args) > 1:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500951 parser.error('Must specify only one task name.')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000952
953 try:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700954 return collect(
955 options.swarming,
956 args[0],
Vadim Shtayurab450c602014-05-12 19:23:25 -0700957 options.shards,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700958 options.timeout,
959 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700960 options.print_status_updates,
961 options.task_output_dir)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000962 except Failure as e:
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +0000963 tools.report_error(e)
964 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000965
966
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400967def CMDquery(parser, args):
968 """Returns information about the bots connected to the Swarming server."""
969 add_filter_options(parser)
970 parser.filter_group.add_option(
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400971 '--dead-only', action='store_true',
972 help='Only print dead bots, useful to reap them and reimage broken bots')
973 parser.filter_group.add_option(
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400974 '-k', '--keep-dead', action='store_true',
975 help='Do not filter out dead bots')
976 parser.filter_group.add_option(
977 '-b', '--bare', action='store_true',
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400978 help='Do not print out dimensions')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400979 options, args = parser.parse_args(args)
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400980
981 if options.keep_dead and options.dead_only:
982 parser.error('Use only one of --keep-dead and --dead-only')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400983 service = net.get_http_service(options.swarming)
984 data = service.json_request('GET', '/swarming/api/v1/bots')
985 if data is None:
986 print >> sys.stderr, 'Failed to access %s' % options.swarming
987 return 1
988 timeout = datetime.timedelta(seconds=data['machine_death_timeout'])
989 utcnow = datetime.datetime.utcnow()
Marc-Antoine Ruele4bebbc2014-06-04 09:36:14 -0400990 for machine in natsort.natsorted(data['machines'], key=lambda x: x['id']):
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400991 last_seen = datetime.datetime.strptime(
992 machine['last_seen'], '%Y-%m-%d %H:%M:%S')
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400993 is_dead = utcnow - last_seen > timeout
994 if options.dead_only:
995 if not is_dead:
996 continue
997 elif not options.keep_dead and is_dead:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400998 continue
999
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -04001000 # If the user requested to filter on dimensions, ensure the bot has all the
1001 # dimensions requested.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -04001002 dimensions = machine['dimensions']
1003 for key, value in options.dimensions:
1004 if key not in dimensions:
1005 break
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -04001006 # A bot can have multiple value for a key, for example,
1007 # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
1008 # be accepted.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -04001009 if isinstance(dimensions[key], list):
1010 if value not in dimensions[key]:
1011 break
1012 else:
1013 if value != dimensions[key]:
1014 break
1015 else:
Marc-Antoine Ruele4bebbc2014-06-04 09:36:14 -04001016 print machine['id']
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -04001017 if not options.bare:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -04001018 print ' %s' % dimensions
1019 return 0
1020
1021
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001022@subcommand.usage('(hash|isolated) [-- extra_args]')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001023def CMDrun(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001024 """Triggers a task and wait for the results.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001025
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001026 Basically, does everything to run a command remotely.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001027 """
1028 add_trigger_options(parser)
1029 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001030 add_sharding_options(parser)
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001031 args, isolated_cmd_args = extract_isolated_command_extra_args(args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001032 options, args = parser.parse_args(args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001033 process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001034
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001035 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001036 tasks, task_name = trigger(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001037 swarming=options.swarming,
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05001038 isolate_server=options.isolate_server or options.indir,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001039 namespace=options.namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001040 file_hash_or_isolated=args[0],
1041 task_name=options.task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001042 extra_args=isolated_cmd_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001043 shards=options.shards,
1044 dimensions=options.dimensions,
1045 env=dict(options.env),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -04001046 deadline=options.deadline,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001047 verbose=options.verbose,
1048 profile=options.profile,
1049 priority=options.priority)
1050 except Failure as e:
1051 tools.report_error(
1052 'Failed to trigger %s(%s): %s' %
1053 (options.task_name, args[0], e.args[0]))
1054 return 1
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001055 if not tasks:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001056 tools.report_error('Failed to trigger the task.')
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001057 return 1
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001058 if task_name != options.task_name:
1059 print('Triggered task: %s' % task_name)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001060 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001061 # TODO(maruel): Use task_ids, it's much more efficient!
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001062 return collect(
1063 options.swarming,
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001064 task_name,
Vadim Shtayurab450c602014-05-12 19:23:25 -07001065 options.shards,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001066 options.timeout,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -07001067 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001068 options.print_status_updates,
1069 options.task_output_dir)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001070 except Failure as e:
1071 tools.report_error(e)
1072 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001073
1074
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001075@subcommand.usage("(hash|isolated) [-- extra_args]")
maruel@chromium.org0437a732013-08-27 16:05:52 +00001076def CMDtrigger(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001077 """Triggers a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001078
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001079 Accepts either the hash (sha1) of a .isolated file already uploaded or the
1080 path to an .isolated file to archive, packages it if needed and sends a
1081 Swarming manifest file to the Swarming server.
1082
1083 If an .isolated file is specified instead of an hash, it is first archived.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001084
1085 Passes all extra arguments provided after '--' as additional command line
1086 arguments for an isolated command specified in *.isolate file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001087 """
1088 add_trigger_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001089 add_sharding_options(parser)
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001090 args, isolated_cmd_args = extract_isolated_command_extra_args(args)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001091 parser.add_option(
1092 '--dump-json',
1093 metavar='FILE',
1094 help='Dump details about the triggered task(s) to this file as json')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001095 options, args = parser.parse_args(args)
1096 process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001097
1098 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001099 tasks, task_name = trigger(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001100 swarming=options.swarming,
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05001101 isolate_server=options.isolate_server or options.indir,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001102 namespace=options.namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001103 file_hash_or_isolated=args[0],
1104 task_name=options.task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001105 extra_args=isolated_cmd_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001106 shards=options.shards,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001107 dimensions=options.dimensions,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -05001108 env=dict(options.env),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -04001109 deadline=options.deadline,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001110 verbose=options.verbose,
1111 profile=options.profile,
1112 priority=options.priority)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001113 if tasks:
1114 if task_name != options.task_name:
1115 print('Triggered task: %s' % task_name)
1116 if options.dump_json:
1117 data = {
1118 'base_task_name': task_name,
1119 'tasks': tasks,
1120 }
1121 tools.write_json(options.dump_json, data, True)
1122 return int(not tasks)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001123 except Failure as e:
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001124 tools.report_error(e)
1125 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001126
1127
1128class OptionParserSwarming(tools.OptionParserWithLogging):
1129 def __init__(self, **kwargs):
1130 tools.OptionParserWithLogging.__init__(
1131 self, prog='swarming.py', **kwargs)
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001132 self.server_group = tools.optparse.OptionGroup(self, 'Server')
1133 self.server_group.add_option(
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001134 '-S', '--swarming',
Kevin Graney5346c162014-01-24 12:20:01 -05001135 metavar='URL', default=os.environ.get('SWARMING_SERVER', ''),
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001136 help='Swarming server to use')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001137 self.add_option_group(self.server_group)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001138 auth.add_auth_options(self)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001139
1140 def parse_args(self, *args, **kwargs):
1141 options, args = tools.OptionParserWithLogging.parse_args(
1142 self, *args, **kwargs)
1143 options.swarming = options.swarming.rstrip('/')
1144 if not options.swarming:
1145 self.error('--swarming is required.')
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001146 auth.process_auth_options(self, options)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001147 return options, args
1148
1149
1150def main(args):
1151 dispatcher = subcommand.CommandDispatcher(__name__)
1152 try:
1153 return dispatcher.execute(OptionParserSwarming(version=__version__), args)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001154 except Exception as e:
1155 tools.report_error(e)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001156 return 1
1157
1158
1159if __name__ == '__main__':
1160 fix_encoding.fix_encoding()
1161 tools.disable_buffering()
1162 colorama.init()
1163 sys.exit(main(sys.argv[1:]))