blob: 8279816c30f3223ad1349e09d9b157fd64ad6791 [file] [log] [blame]
maruel@chromium.org0437a732013-08-27 16:05:52 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00005
6"""Client tool to trigger tasks or retrieve results from a Swarming server."""
7
Vadim Shtayurab450c602014-05-12 19:23:25 -07008__version__ = '0.4.8'
maruel@chromium.org0437a732013-08-27 16:05:52 +00009
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040010import datetime
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -050011import getpass
maruel@chromium.org0437a732013-08-27 16:05:52 +000012import hashlib
13import json
14import logging
15import os
Vadim Shtayurae3fbd102014-04-29 17:05:21 -070016import re
maruel@chromium.org0437a732013-08-27 16:05:52 +000017import shutil
maruel@chromium.org0437a732013-08-27 16:05:52 +000018import subprocess
19import sys
Vadim Shtayurab19319e2014-04-27 08:50:06 -070020import threading
maruel@chromium.org0437a732013-08-27 16:05:52 +000021import time
22import urllib
maruel@chromium.org0437a732013-08-27 16:05:52 +000023
24from third_party import colorama
25from third_party.depot_tools import fix_encoding
26from third_party.depot_tools import subcommand
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000027
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -050028from utils import file_path
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040029from third_party.chromium import natsort
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000030from utils import net
maruel@chromium.org0437a732013-08-27 16:05:52 +000031from utils import threading_utils
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000032from utils import tools
33from utils import zip_package
maruel@chromium.org0437a732013-08-27 16:05:52 +000034
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080035import auth
maruel@chromium.org7b844a62013-09-17 13:04:59 +000036import isolateserver
maruel@chromium.org0437a732013-08-27 16:05:52 +000037import run_isolated
38
39
40ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
41TOOLS_PATH = os.path.join(ROOT_DIR, 'tools')
42
43
maruel@chromium.org0437a732013-08-27 16:05:52 +000044# The default time to wait for a shard to finish running.
csharp@chromium.org24758492013-08-28 19:10:54 +000045DEFAULT_SHARD_WAIT_TIME = 80 * 60.
maruel@chromium.org0437a732013-08-27 16:05:52 +000046
Vadim Shtayura86a2cef2014-04-18 11:13:39 -070047# How often to print status updates to stdout in 'collect'.
48STATUS_UPDATE_INTERVAL = 15 * 60.
49
maruel@chromium.org0437a732013-08-27 16:05:52 +000050
51NO_OUTPUT_FOUND = (
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -050052 'No output produced by the task, it may have failed to run.\n'
maruel@chromium.org0437a732013-08-27 16:05:52 +000053 '\n')
54
55
maruel@chromium.org0437a732013-08-27 16:05:52 +000056class Failure(Exception):
57 """Generic failure."""
58 pass
59
60
61class Manifest(object):
Vadim Shtayurab450c602014-05-12 19:23:25 -070062 """Represents a Swarming task manifest."""
maruel@chromium.org0437a732013-08-27 16:05:52 +000063
maruel@chromium.org0437a732013-08-27 16:05:52 +000064 def __init__(
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070065 self, isolate_server, namespace, isolated_hash, task_name, extra_args,
Marc-Antoine Ruelaea50652014-06-12 14:23:48 -040066 env, dimensions, deadline, verbose, profile,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070067 priority):
maruel@chromium.org0437a732013-08-27 16:05:52 +000068 """Populates a manifest object.
69 Args:
Marc-Antoine Ruela7049872013-11-05 19:28:35 -050070 isolate_server - isolate server url.
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050071 namespace - isolate server namespace to use.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070072 isolated_hash - the manifest's sha-1 that the slave is going to fetch.
73 task_name - the name to give the task request.
74 extra_args - additional arguments to pass to isolated command.
Marc-Antoine Ruel05dab5e2013-11-06 15:06:47 -050075 env - environment variables to set.
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -050076 dimensions - dimensions to filter the task on.
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -040077 deadline - maximum pending time before this task expires.
maruel@chromium.org0437a732013-08-27 16:05:52 +000078 verbose - if True, have the slave print more details.
79 profile - if True, have the slave print more timing data.
maruel@chromium.org7b844a62013-09-17 13:04:59 +000080 priority - int between 0 and 1000, lower the higher priority.
maruel@chromium.org0437a732013-08-27 16:05:52 +000081 """
Marc-Antoine Ruela7049872013-11-05 19:28:35 -050082 self.isolate_server = isolate_server
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050083 self.namespace = namespace
maruel@chromium.org814d23f2013-10-01 19:08:00 +000084 self.isolated_hash = isolated_hash
Vadim Shtayurab450c602014-05-12 19:23:25 -070085 self.task_name = task_name
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070086 self.extra_args = tuple(extra_args or [])
Vadim Shtayurab450c602014-05-12 19:23:25 -070087 self.env = env.copy()
88 self.dimensions = dimensions.copy()
Vadim Shtayurab450c602014-05-12 19:23:25 -070089 self.deadline = deadline
maruel@chromium.org0437a732013-08-27 16:05:52 +000090 self.verbose = bool(verbose)
91 self.profile = bool(profile)
92 self.priority = priority
maruel@chromium.org0437a732013-08-27 16:05:52 +000093 self._tasks = []
Vadim Shtayurab450c602014-05-12 19:23:25 -070094 self._files = []
maruel@chromium.org0437a732013-08-27 16:05:52 +000095
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -040096 def add_task(self, task_name, actions, time_out=2*60*60):
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -050097 """Appends a new task as a TestObject to the swarming manifest file.
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -050098
99 Tasks cannot be added once the manifest was uploaded.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500100
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -0400101 By default, command will be killed after 2 hours of execution.
102
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500103 See TestObject in services/swarming/src/common/test_request_message.py for
104 the valid format.
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500105 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000106 self._tasks.append(
107 {
108 'action': actions,
109 'decorate_output': self.verbose,
110 'test_name': task_name,
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -0400111 'hard_time_out': time_out,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000112 })
113
Vadim Shtayurab450c602014-05-12 19:23:25 -0700114 def add_bundled_file(self, file_name, file_url):
115 """Appends a file to the manifest.
116
117 File will be downloaded and extracted by the swarm bot before launching the
118 task.
119 """
120 self._files.append([file_url, file_name])
121
maruel@chromium.org0437a732013-08-27 16:05:52 +0000122 def to_json(self):
123 """Exports the current configuration into a swarm-readable manifest file.
124
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500125 The actual serialization format is defined as a TestCase object as described
126 in services/swarming/src/common/test_request_message.py
maruel@chromium.org0437a732013-08-27 16:05:52 +0000127 """
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500128 request = {
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500129 'cleanup': 'root',
maruel@chromium.org0437a732013-08-27 16:05:52 +0000130 'configurations': [
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500131 # Is a TestConfiguration.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000132 {
Marc-Antoine Ruel5d799192013-11-06 15:20:39 -0500133 'config_name': 'isolated',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700134 'deadline_to_run': self.deadline,
135 'dimensions': self.dimensions,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500136 'priority': self.priority,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000137 },
138 ],
Vadim Shtayurab450c602014-05-12 19:23:25 -0700139 'data': self._files,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700140 'env_vars': self.env,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700141 'test_case_name': self.task_name,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500142 'tests': self._tasks,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000143 }
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500144 return json.dumps(request, sort_keys=True, separators=(',',':'))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000145
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500146
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700147class TaskOutputCollector(object):
148 """Fetches task output from isolate server to local disk.
149
150 This object is shared among multiple threads running 'retrieve_results'
151 function, in particular they call 'process_shard_result' method in parallel.
152 """
153
154 def __init__(self, task_output_dir, task_name, shard_count):
155 """Initializes TaskOutputCollector, ensures |task_output_dir| exists.
156
157 Args:
158 task_output_dir: local directory to put fetched files to.
159 task_name: name of the swarming task results belong to.
160 shard_count: expected number of task shards.
161 """
162 self.task_output_dir = task_output_dir
163 self.task_name = task_name
164 self.shard_count = shard_count
165
166 self._lock = threading.Lock()
167 self._per_shard_results = {}
168 self._storage = None
169
170 if not os.path.isdir(self.task_output_dir):
171 os.makedirs(self.task_output_dir)
172
Vadim Shtayurab450c602014-05-12 19:23:25 -0700173 def process_shard_result(self, shard_index, result):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700174 """Stores results of a single task shard, fetches output files if necessary.
175
176 Called concurrently from multiple threads.
177 """
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700178 # Sanity check index is in expected range.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700179 assert isinstance(shard_index, int)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700180 if shard_index < 0 or shard_index >= self.shard_count:
181 logging.warning(
182 'Shard index %d is outside of expected range: [0; %d]',
183 shard_index, self.shard_count - 1)
184 return
185
186 # Store result dict of that shard, ignore results we've already seen.
187 with self._lock:
188 if shard_index in self._per_shard_results:
189 logging.warning('Ignoring duplicate shard index %d', shard_index)
190 return
191 self._per_shard_results[shard_index] = result
192
193 # Fetch output files if necessary.
194 isolated_files_location = extract_output_files_location(result['output'])
195 if isolated_files_location:
196 isolate_server, namespace, isolated_hash = isolated_files_location
197 storage = self._get_storage(isolate_server, namespace)
198 if storage:
199 # Output files are supposed to be small and they are not reused across
200 # tasks. So use MemoryCache for them instead of on-disk cache. Make
201 # files writable, so that calling script can delete them.
202 isolateserver.fetch_isolated(
203 isolated_hash,
204 storage,
205 isolateserver.MemoryCache(file_mode_mask=0700),
206 os.path.join(self.task_output_dir, str(shard_index)),
207 False)
208
209 def finalize(self):
210 """Writes summary.json, shutdowns underlying Storage."""
211 with self._lock:
212 # Write an array of shard results with None for missing shards.
213 summary = {
214 'task_name': self.task_name,
215 'shards': [
216 self._per_shard_results.get(i) for i in xrange(self.shard_count)
217 ],
218 }
219 tools.write_json(
220 os.path.join(self.task_output_dir, 'summary.json'),
221 summary,
222 False)
223 if self._storage:
224 self._storage.close()
225 self._storage = None
226
227 def _get_storage(self, isolate_server, namespace):
228 """Returns isolateserver.Storage to use to fetch files."""
229 with self._lock:
230 if not self._storage:
231 self._storage = isolateserver.get_storage(isolate_server, namespace)
232 else:
233 # Shards must all use exact same isolate server and namespace.
234 if self._storage.location != isolate_server:
235 logging.error(
236 'Task shards are using multiple isolate servers: %s and %s',
237 self._storage.location, isolate_server)
238 return None
239 if self._storage.namespace != namespace:
240 logging.error(
241 'Task shards are using multiple namespaces: %s and %s',
242 self._storage.namespace, namespace)
243 return None
244 return self._storage
245
246
maruel@chromium.org0437a732013-08-27 16:05:52 +0000247def now():
248 """Exists so it can be mocked easily."""
249 return time.time()
250
251
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500252def get_task_keys(swarm_base_url, task_name):
253 """Returns the Swarming task key for each shards of task_name."""
254 key_data = urllib.urlencode([('name', task_name)])
maruel@chromium.org0437a732013-08-27 16:05:52 +0000255 url = '%s/get_matching_test_cases?%s' % (swarm_base_url, key_data)
256
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000257 for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
258 result = net.url_read(url, retry_404=True)
259 if result is None:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000260 raise Failure(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500261 'Error: Unable to find any task with the name, %s, on swarming server'
262 % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000263
maruel@chromium.org0437a732013-08-27 16:05:52 +0000264 # TODO(maruel): Compare exact string.
265 if 'No matching' in result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500266 logging.warning('Unable to find any task with the name, %s, on swarming '
267 'server' % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000268 continue
269 return json.loads(result)
270
271 raise Failure(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500272 'Error: Unable to find any task with the name, %s, on swarming server'
273 % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000274
275
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700276def extract_output_files_location(task_log):
277 """Task log -> location of task output files to fetch.
278
279 TODO(vadimsh,maruel): Use side-channel to get this information.
280 See 'run_tha_test' in run_isolated.py for where the data is generated.
281
282 Returns:
283 Tuple (isolate server URL, namespace, isolated hash) on success.
284 None if information is missing or can not be parsed.
285 """
286 match = re.search(
287 r'\[run_isolated_out_hack\](.*)\[/run_isolated_out_hack\]',
288 task_log,
289 re.DOTALL)
290 if not match:
291 return None
292
293 def to_ascii(val):
294 if not isinstance(val, basestring):
295 raise ValueError()
296 return val.encode('ascii')
297
298 try:
299 data = json.loads(match.group(1))
300 if not isinstance(data, dict):
301 raise ValueError()
302 isolated_hash = to_ascii(data['hash'])
303 namespace = to_ascii(data['namespace'])
304 isolate_server = to_ascii(data['storage'])
305 if not file_path.is_url(isolate_server):
306 raise ValueError()
307 return (isolate_server, namespace, isolated_hash)
308 except (KeyError, ValueError):
309 logging.warning(
310 'Unexpected value of run_isolated_out_hack: %s', match.group(1))
311 return None
312
313
314def retrieve_results(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700315 base_url, shard_index, task_key, timeout, should_stop, output_collector):
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700316 """Retrieves results for a single task_key.
317
Vadim Shtayurab450c602014-05-12 19:23:25 -0700318 Returns:
319 <result dict> on success.
320 None on failure.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700321 """
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000322 assert isinstance(timeout, float), timeout
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500323 params = [('r', task_key)]
maruel@chromium.org0437a732013-08-27 16:05:52 +0000324 result_url = '%s/get_result?%s' % (base_url, urllib.urlencode(params))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700325 started = now()
326 deadline = started + timeout if timeout else None
327 attempt = 0
328
329 while not should_stop.is_set():
330 attempt += 1
331
332 # Waiting for too long -> give up.
333 current_time = now()
334 if deadline and current_time >= deadline:
335 logging.error('retrieve_results(%s) timed out on attempt %d',
336 base_url, attempt)
337 return None
338
339 # Do not spin too fast. Spin faster at the beginning though.
340 # Start with 1 sec delay and for each 30 sec of waiting add another second
341 # of delay, until hitting 15 sec ceiling.
342 if attempt > 1:
343 max_delay = min(15, 1 + (current_time - started) / 30.0)
344 delay = min(max_delay, deadline - current_time) if deadline else max_delay
345 if delay > 0:
346 logging.debug('Waiting %.1f sec before retrying', delay)
347 should_stop.wait(delay)
348 if should_stop.is_set():
349 return None
350
351 # Disable internal retries in net.url_read, since we are doing retries
352 # ourselves. Do not use retry_404 so should_stop is polled more often.
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000353 response = net.url_read(result_url, retry_404=False, retry_50x=False)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700354
355 # Request failed. Try again.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000356 if response is None:
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700357 continue
358
359 # Got some response, ensure it is JSON dict, retry if not.
360 try:
361 result = json.loads(response) or {}
362 if not isinstance(result, dict):
363 raise ValueError()
364 except (ValueError, TypeError):
365 logging.warning(
366 'Received corrupted or invalid data for task_key %s, retrying: %r',
367 task_key, response)
368 continue
369
370 # Swarming server uses non-empty 'output' value as a flag that task has
371 # finished. How to wait for tasks that produce no output is a mystery.
372 if result.get('output'):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700373 # Record the result, try to fetch attached output files (if any).
374 if output_collector:
375 # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700376 output_collector.process_shard_result(shard_index, result)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700377 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +0000378
379
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700380def yield_results(
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700381 swarm_base_url, task_keys, timeout, max_threads,
382 print_status_updates, output_collector):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500383 """Yields swarming task results from the swarming server as (index, result).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000384
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700385 Duplicate shards are ignored. Shards are yielded in order of completion.
386 Timed out shards are NOT yielded at all. Caller can compare number of yielded
387 shards with len(task_keys) to verify all shards completed.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000388
389 max_threads is optional and is used to limit the number of parallel fetches
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500390 done. Since in general the number of task_keys is in the range <=10, it's not
maruel@chromium.org0437a732013-08-27 16:05:52 +0000391 worth normally to limit the number threads. Mostly used for testing purposes.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500392
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700393 output_collector is an optional instance of TaskOutputCollector that will be
394 used to fetch files produced by a task from isolate server to the local disk.
395
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500396 Yields:
397 (index, result). In particular, 'result' is defined as the
398 GetRunnerResults() function in services/swarming/server/test_runner.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000399 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000400 number_threads = (
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500401 min(max_threads, len(task_keys)) if max_threads else len(task_keys))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700402 should_stop = threading.Event()
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700403 results_channel = threading_utils.TaskChannel()
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700404
maruel@chromium.org0437a732013-08-27 16:05:52 +0000405 with threading_utils.ThreadPool(number_threads, number_threads, 0) as pool:
406 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700407 # Adds a task to the thread pool to call 'retrieve_results' and return
408 # the results together with shard_index that produced them (as a tuple).
409 def enqueue_retrieve_results(shard_index, task_key):
410 task_fn = lambda *args: (shard_index, retrieve_results(*args))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000411 pool.add_task(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700412 0, results_channel.wrap_task(task_fn),
413 swarm_base_url, shard_index, task_key, timeout,
414 should_stop, output_collector)
415
416 # Enqueue 'retrieve_results' calls for each shard key to run in parallel.
417 for shard_index, task_key in enumerate(task_keys):
418 enqueue_retrieve_results(shard_index, task_key)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700419
420 # Wait for all of them to finish.
421 shards_remaining = range(len(task_keys))
422 active_task_count = len(task_keys)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700423 while active_task_count:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700424 shard_index, result = None, None
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700425 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700426 shard_index, result = results_channel.pull(
427 timeout=STATUS_UPDATE_INTERVAL)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700428 except threading_utils.TaskChannel.Timeout:
429 if print_status_updates:
430 print(
431 'Waiting for results from the following shards: %s' %
432 ', '.join(map(str, shards_remaining)))
433 sys.stdout.flush()
434 continue
435 except Exception:
436 logging.exception('Unexpected exception in retrieve_results')
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700437
438 # A call to 'retrieve_results' finished (successfully or not).
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700439 active_task_count -= 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000440 if not result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500441 logging.error('Failed to retrieve the results for a swarming key')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000442 continue
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700443
Vadim Shtayurab450c602014-05-12 19:23:25 -0700444 # Yield back results to the caller.
445 assert shard_index in shards_remaining
446 shards_remaining.remove(shard_index)
447 yield shard_index, result
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700448
maruel@chromium.org0437a732013-08-27 16:05:52 +0000449 finally:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700450 # Done or aborted with Ctrl+C, kill the remaining threads.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000451 should_stop.set()
452
453
Vadim Shtayurab450c602014-05-12 19:23:25 -0700454def setup_run_isolated(manifest, bundle):
455 """Sets up the manifest to run an isolated task via run_isolated.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000456
Vadim Shtayurab450c602014-05-12 19:23:25 -0700457 Modifies |bundle| (by adding files) and |manifest| (by adding commands) in
458 place.
459
460 Args:
461 manifest: Manifest with swarm task definition.
462 bundle: ZipPackage with files that would be transfered to swarm bot.
463 If None, only |manifest| is modified (useful in tests).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000464 """
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000465 # Add uncompressed zip here. It'll be compressed as part of the package sent
466 # to Swarming server.
467 run_test_name = 'run_isolated.zip'
Vadim Shtayurab450c602014-05-12 19:23:25 -0700468 if bundle and run_test_name not in bundle.files:
469 bundle.add_buffer(
470 run_test_name,
471 run_isolated.get_as_zip_package().zip_into_buffer(compress=False))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000472
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000473 cleanup_script_name = 'swarm_cleanup.py'
Vadim Shtayurab450c602014-05-12 19:23:25 -0700474 if bundle and cleanup_script_name not in bundle.files:
475 bundle.add_file(
476 os.path.join(TOOLS_PATH, cleanup_script_name), cleanup_script_name)
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000477
maruel@chromium.org0437a732013-08-27 16:05:52 +0000478 run_cmd = [
479 'python', run_test_name,
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000480 '--hash', manifest.isolated_hash,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500481 '--namespace', manifest.namespace,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000482 ]
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500483 if file_path.is_url(manifest.isolate_server):
484 run_cmd.extend(('--isolate-server', manifest.isolate_server))
485 else:
486 run_cmd.extend(('--indir', manifest.isolate_server))
487
maruel@chromium.org0437a732013-08-27 16:05:52 +0000488 if manifest.verbose or manifest.profile:
489 # Have it print the profiling section.
490 run_cmd.append('--verbose')
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700491
492 # Pass all extra args for run_isolated.py, it will pass them to the command.
493 if manifest.extra_args:
494 run_cmd.append('--')
495 run_cmd.extend(manifest.extra_args)
496
maruel@chromium.org0437a732013-08-27 16:05:52 +0000497 manifest.add_task('Run Test', run_cmd)
498
499 # Clean up
500 manifest.add_task('Clean Up', ['python', cleanup_script_name])
501
502
Vadim Shtayurab450c602014-05-12 19:23:25 -0700503def setup_googletest(env, shards, index):
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500504 """Sets googletest specific environment variables."""
505 if shards > 1:
506 env = env.copy()
Vadim Shtayurab450c602014-05-12 19:23:25 -0700507 env['GTEST_SHARD_INDEX'] = str(index)
508 env['GTEST_TOTAL_SHARDS'] = str(shards)
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500509 return env
510
511
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500512def archive(isolate_server, namespace, isolated, algo, verbose):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000513 """Archives a .isolated and all the dependencies on the CAC."""
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500514 logging.info('archive(%s, %s, %s)', isolate_server, namespace, isolated)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000515 tempdir = None
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500516 if file_path.is_url(isolate_server):
517 command = 'archive'
518 flag = '--isolate-server'
519 else:
520 command = 'hashtable'
521 flag = '--outdir'
522
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500523 print('Archiving: %s' % isolated)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000524 try:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000525 cmd = [
526 sys.executable,
527 os.path.join(ROOT_DIR, 'isolate.py'),
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500528 command,
529 flag, isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500530 '--namespace', namespace,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000531 '--isolated', isolated,
532 ]
maruel@chromium.orge9403ab2013-09-20 18:03:49 +0000533 cmd.extend(['--verbose'] * verbose)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000534 logging.info(' '.join(cmd))
535 if subprocess.call(cmd, verbose):
536 return
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000537 return isolateserver.hash_file(isolated, algo)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000538 finally:
539 if tempdir:
540 shutil.rmtree(tempdir)
541
542
Vadim Shtayurab450c602014-05-12 19:23:25 -0700543def get_shard_task_name(task_name, shards, index):
544 """Returns a task name to use for a single shard of a task."""
545 if shards == 1:
546 return task_name
547 return '%s:%s:%s' % (task_name, shards, index)
548
549
550def upload_zip_bundle(isolate_server, bundle):
551 """Uploads a zip package to isolate storage and returns raw fetch URL.
552
553 Args:
554 isolate_server: URL of an isolate server.
555 bundle: instance of ZipPackage to upload.
556
557 Returns:
558 URL to get the file from on success.
559 None on failure.
560 """
561 # Swarming bot would need to be able to grab the file from the storage
562 # using raw HTTP GET. Use 'default' namespace so that the raw data returned
563 # to a bot is not zipped, since swarm_bot doesn't understand compressed
564 # data yet. This namespace have nothing to do with |namespace| passed to
565 # run_isolated.py that is used to store files for isolated task.
566 logging.info('Zipping up and uploading files...')
567 try:
568 start_time = now()
569 isolate_item = isolateserver.BufferItem(
570 bundle.zip_into_buffer(), high_priority=True)
571 with isolateserver.get_storage(isolate_server, 'default') as storage:
572 uploaded = storage.upload_items([isolate_item])
573 bundle_url = storage.get_fetch_url(isolate_item)
574 elapsed = now() - start_time
575 except (IOError, OSError) as exc:
576 tools.report_error('Failed to upload the zip file: %s' % exc)
577 return None
578 if isolate_item in uploaded:
579 logging.info('Upload complete, time elapsed: %f', elapsed)
580 else:
581 logging.info('Zip file already on server, time elapsed: %f', elapsed)
582 return bundle_url
583
584
585def trigger_by_manifest(swarming, manifest):
586 """Given a task manifest, triggers it for execution on swarming.
587
588 Args:
589 swarming: URL of a swarming service.
590 manifest: instance of Manifest.
591
592 Returns:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400593 tuple(Task id, priority) on success. tuple(None, None) on failure.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700594 """
595 logging.info('Triggering: %s', manifest.task_name)
596 manifest_text = manifest.to_json()
597 result = net.url_read(swarming + '/test', data={'request': manifest_text})
598 if not result:
599 tools.report_error('Failed to trigger task %s' % manifest.task_name)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400600 return None
Vadim Shtayurab450c602014-05-12 19:23:25 -0700601 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400602 data = json.loads(result)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700603 except (ValueError, TypeError) as e:
604 msg = '\n'.join((
605 'Failed to trigger task %s' % manifest.task_name,
606 'Manifest: %s' % manifest_text,
607 'Bad response: %s' % result,
608 str(e)))
609 tools.report_error(msg)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400610 return None, None
611 if not data:
612 return None, None
613 return data['test_keys'][0]['test_key'], data['priority']
Vadim Shtayurab450c602014-05-12 19:23:25 -0700614
615
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400616def abort_task(_swarming, _manifest):
Vadim Shtayurab450c602014-05-12 19:23:25 -0700617 """Given a task manifest that was triggered, aborts its execution."""
618 # TODO(vadimsh): No supported by the server yet.
619
620
621def trigger_task_shards(
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700622 swarming, isolate_server, namespace, isolated_hash, task_name, extra_args,
Marc-Antoine Ruelaea50652014-06-12 14:23:48 -0400623 shards, dimensions, env, deadline, verbose, profile, priority):
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400624 """Triggers multiple subtasks of a sharded task.
625
626 Returns:
627 dict(task_name: task_id). None in case of failure.
628 """
Vadim Shtayurab450c602014-05-12 19:23:25 -0700629 # Collects all files that are necessary to bootstrap a task execution
630 # on the bot. Usually it includes self contained run_isolated.zip and
631 # a bunch of small other scripts. All heavy files are pulled
632 # by run_isolated.zip. Updated in 'setup_run_isolated'.
633 bundle = zip_package.ZipPackage(ROOT_DIR)
634
635 # Make a separate Manifest for each shard, put shard index and number of
636 # shards into env and subtask name.
637 manifests = []
638 for index in xrange(shards):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000639 manifest = Manifest(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500640 isolate_server=isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500641 namespace=namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500642 isolated_hash=isolated_hash,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700643 task_name=get_shard_task_name(task_name, shards, index),
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700644 extra_args=extra_args,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500645 dimensions=dimensions,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700646 env=setup_googletest(env, shards, index),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400647 deadline=deadline,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500648 verbose=verbose,
649 profile=profile,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800650 priority=priority)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700651 setup_run_isolated(manifest, bundle)
652 manifests.append(manifest)
653
654 # Upload zip bundle file to get its URL.
655 bundle_url = upload_zip_bundle(isolate_server, bundle)
656 if not bundle_url:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400657 return None, None
maruel@chromium.org0437a732013-08-27 16:05:52 +0000658
Vadim Shtayurab450c602014-05-12 19:23:25 -0700659 # Attach that file to all manifests.
660 for manifest in manifests:
661 manifest.add_bundled_file('swarm_data.zip', bundle_url)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000662
Vadim Shtayurab450c602014-05-12 19:23:25 -0700663 # Trigger all the subtasks.
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400664 tasks = {}
665 priority_warning = False
Vadim Shtayurab450c602014-05-12 19:23:25 -0700666 for manifest in manifests:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400667 task_id, priority = trigger_by_manifest(swarming, manifest)
668 if not task_id:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700669 break
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400670 if not priority_warning and priority != manifest.priority:
671 priority_warning = True
672 print >> sys.stderr, 'Priority was reset to %s' % priority
673 tasks[manifest.task_name] = task_id
Vadim Shtayurab450c602014-05-12 19:23:25 -0700674
675 # Some shards weren't triggered. Abort everything.
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400676 if len(tasks) != len(manifests):
677 if tasks:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700678 print >> sys.stderr, 'Not all shards were triggered'
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400679 for task_id in tasks.itervalues():
680 abort_task(swarming, task_id)
681 return None
maruel@chromium.org0437a732013-08-27 16:05:52 +0000682
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400683 return tasks
maruel@chromium.org0437a732013-08-27 16:05:52 +0000684
685
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500686def isolated_to_hash(isolate_server, namespace, arg, algo, verbose):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500687 """Archives a .isolated file if needed.
688
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500689 Returns the file hash to trigger and a bool specifying if it was a file (True)
690 or a hash (False).
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500691 """
692 if arg.endswith('.isolated'):
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500693 file_hash = archive(isolate_server, namespace, arg, algo, verbose)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500694 if not file_hash:
695 tools.report_error('Archival failure %s' % arg)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500696 return None, True
697 return file_hash, True
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500698 elif isolateserver.is_valid_hash(arg, algo):
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500699 return arg, False
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500700 else:
701 tools.report_error('Invalid hash %s' % arg)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500702 return None, False
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500703
704
maruel@chromium.org0437a732013-08-27 16:05:52 +0000705def trigger(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500706 swarming,
707 isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500708 namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500709 file_hash_or_isolated,
710 task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700711 extra_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500712 shards,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500713 dimensions,
714 env,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400715 deadline,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000716 verbose,
717 profile,
718 priority):
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400719 """Sends off the hash swarming task requests.
720
721 Returns:
722 tuple(dict(task_name: task_id), base task name). The dict of tasks is None
723 in case of failure.
724 """
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500725 file_hash, is_file = isolated_to_hash(
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500726 isolate_server, namespace, file_hash_or_isolated, hashlib.sha1, verbose)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500727 if not file_hash:
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500728 return 1, ''
729 if not task_name:
730 # If a file name was passed, use its base name of the isolated hash.
731 # Otherwise, use user name as an approximation of a task name.
732 if is_file:
733 key = os.path.splitext(os.path.basename(file_hash_or_isolated))[0]
734 else:
735 key = getpass.getuser()
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700736 task_name = '%s/%s/%s/%d' % (
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500737 key,
738 '_'.join('%s=%s' % (k, v) for k, v in sorted(dimensions.iteritems())),
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700739 file_hash,
740 now() * 1000)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500741
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400742 tasks = trigger_task_shards(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500743 swarming=swarming,
744 isolate_server=isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500745 namespace=namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500746 isolated_hash=file_hash,
747 task_name=task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700748 extra_args=extra_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500749 shards=shards,
750 dimensions=dimensions,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400751 deadline=deadline,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500752 env=env,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500753 verbose=verbose,
754 profile=profile,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800755 priority=priority)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -0400756 return tasks, task_name
maruel@chromium.org0437a732013-08-27 16:05:52 +0000757
758
Vadim Shtayurab450c602014-05-12 19:23:25 -0700759def decorate_shard_output(shard_index, result, shard_exit_code):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000760 """Returns wrapped output for swarming task shard."""
761 tag = 'index %s (machine tag: %s, id: %s)' % (
Vadim Shtayurab450c602014-05-12 19:23:25 -0700762 shard_index,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000763 result['machine_id'],
764 result.get('machine_tag', 'unknown'))
765 return (
766 '\n'
767 '================================================================\n'
768 'Begin output from shard %s\n'
769 '================================================================\n'
770 '\n'
771 '%s'
772 '================================================================\n'
Vadim Shtayura473455a2014-05-14 15:22:35 -0700773 'End output from shard %s.\nExit code %d (%s).\n'
774 '================================================================\n') % (
775 tag, result['output'] or NO_OUTPUT_FOUND, tag,
776 shard_exit_code, hex(0xffffffff & shard_exit_code))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000777
778
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700779def collect(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700780 url, task_name, shards, timeout, decorate,
781 print_status_updates, task_output_dir):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500782 """Retrieves results of a Swarming task."""
Vadim Shtayurab450c602014-05-12 19:23:25 -0700783 # Grab task keys for each shard. Order is important, used to figure out
784 # shard index based on the key.
785 # TODO(vadimsh): Simplify this once server support is added.
786 task_keys = []
787 for index in xrange(shards):
788 shard_task_name = get_shard_task_name(task_name, shards, index)
789 logging.info('Collecting %s', shard_task_name)
790 shard_task_keys = get_task_keys(url, shard_task_name)
791 if not shard_task_keys:
792 raise Failure('No task keys to get results with: %s' % shard_task_name)
793 if len(shard_task_keys) != 1:
794 raise Failure('Expecting only one shard for a task: %s' % shard_task_name)
795 task_keys.append(shard_task_keys[0])
maruel@chromium.org0437a732013-08-27 16:05:52 +0000796
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700797 # Collect output files only if explicitly asked with --task-output-dir option.
798 if task_output_dir:
799 output_collector = TaskOutputCollector(
800 task_output_dir, task_name, len(task_keys))
801 else:
802 output_collector = None
803
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700804 seen_shards = set()
Vadim Shtayurac524f512014-05-15 09:54:56 -0700805 exit_codes = []
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700806
807 try:
808 for index, output in yield_results(
809 url, task_keys, timeout, None, print_status_updates, output_collector):
810 seen_shards.add(index)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700811
812 # Grab first non-zero exit code as an overall shard exit code.
813 shard_exit_code = 0
814 for code in map(int, (output['exit_codes'] or '1').split(',')):
815 if code:
816 shard_exit_code = code
817 break
Vadim Shtayurac524f512014-05-15 09:54:56 -0700818 exit_codes.append(shard_exit_code)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700819
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700820 if decorate:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700821 print decorate_shard_output(index, output, shard_exit_code)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700822 else:
823 print(
824 '%s/%s: %s' % (
825 output['machine_id'],
826 output['machine_tag'],
827 output['exit_codes']))
828 print(''.join(' %s\n' % l for l in output['output'].splitlines()))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700829 finally:
830 if output_collector:
831 output_collector.finalize()
832
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700833 if len(seen_shards) != len(task_keys):
834 missing_shards = [x for x in range(len(task_keys)) if x not in seen_shards]
835 print >> sys.stderr, ('Results from some shards are missing: %s' %
836 ', '.join(map(str, missing_shards)))
Vadim Shtayurac524f512014-05-15 09:54:56 -0700837 return 1
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700838
Vadim Shtayurac524f512014-05-15 09:54:56 -0700839 return int(bool(any(exit_codes)))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000840
841
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400842def add_filter_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500843 parser.filter_group = tools.optparse.OptionGroup(parser, 'Filtering slaves')
844 parser.filter_group.add_option(
Marc-Antoine Ruelb39e8cf2014-01-20 10:39:31 -0500845 '-d', '--dimension', default=[], action='append', nargs=2,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500846 dest='dimensions', metavar='FOO bar',
847 help='dimension to filter on')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500848 parser.add_option_group(parser.filter_group)
849
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400850
Marc-Antoine Ruel025e7822014-05-01 11:50:24 -0400851def process_filter_options(parser, options):
852 options.dimensions = dict(options.dimensions)
853 if not options.dimensions:
854 parser.error('Please at least specify one --dimension')
855
856
Vadim Shtayurab450c602014-05-12 19:23:25 -0700857def add_sharding_options(parser):
858 parser.sharding_group = tools.optparse.OptionGroup(parser, 'Sharding options')
859 parser.sharding_group.add_option(
860 '--shards', type='int', default=1,
861 help='Number of shards to trigger and collect.')
862 parser.add_option_group(parser.sharding_group)
863
864
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400865def add_trigger_options(parser):
866 """Adds all options to trigger a task on Swarming."""
867 isolateserver.add_isolate_server_options(parser, True)
868 add_filter_options(parser)
869
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500870 parser.task_group = tools.optparse.OptionGroup(parser, 'Task properties')
871 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500872 '-e', '--env', default=[], action='append', nargs=2, metavar='FOO bar',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700873 help='Environment variables to set')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500874 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500875 '--priority', type='int', default=100,
876 help='The lower value, the more important the task is')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500877 parser.task_group.add_option(
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500878 '-T', '--task-name',
879 help='Display name of the task. It uniquely identifies the task. '
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700880 'Defaults to <base_name>/<dimensions>/<isolated hash>/<timestamp> '
881 'if an isolated file is provided, if a hash is provided, it '
882 'defaults to <user>/<dimensions>/<isolated hash>/<timestamp>')
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400883 parser.task_group.add_option(
884 '--deadline', type='int', default=6*60*60,
885 help='Seconds to allow the task to be pending for a bot to run before '
886 'this task request expires.')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500887 parser.add_option_group(parser.task_group)
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500888 # TODO(maruel): This is currently written in a chromium-specific way.
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500889 parser.group_logging.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000890 '--profile', action='store_true',
891 default=bool(os.environ.get('ISOLATE_DEBUG')),
892 help='Have run_isolated.py print profiling info')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000893
894
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500895def process_trigger_options(parser, options, args):
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500896 isolateserver.process_isolate_server_options(parser, options)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500897 if len(args) != 1:
898 parser.error('Must pass one .isolated file or its hash (sha1).')
Marc-Antoine Ruel025e7822014-05-01 11:50:24 -0400899 process_filter_options(parser, options)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000900
901
902def add_collect_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500903 parser.server_group.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000904 '-t', '--timeout',
905 type='float',
906 default=DEFAULT_SHARD_WAIT_TIME,
907 help='Timeout to wait for result, set to 0 for no timeout; default: '
908 '%default s')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500909 parser.group_logging.add_option(
910 '--decorate', action='store_true', help='Decorate output')
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700911 parser.group_logging.add_option(
912 '--print-status-updates', action='store_true',
913 help='Print periodic status updates')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700914 parser.task_output_group = tools.optparse.OptionGroup(parser, 'Task output')
915 parser.task_output_group.add_option(
916 '--task-output-dir',
917 help='Directory to put task results into. When the task finishes, this '
918 'directory contains <task-output-dir>/summary.json file with '
919 'a summary of task results across all shards, and per-shard '
920 'directory with output files produced by a shard: '
921 '<task-output-dir>/<zero-based-shard-index>/')
922 parser.add_option_group(parser.task_output_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000923
924
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700925def extract_isolated_command_extra_args(args):
926 try:
927 index = args.index('--')
928 except ValueError:
929 return (args, [])
930 return (args[:index], args[index+1:])
931
932
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500933@subcommand.usage('task_name')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000934def CMDcollect(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500935 """Retrieves results of a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000936
937 The result can be in multiple part if the execution was sharded. It can
938 potentially have retries.
939 """
940 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700941 add_sharding_options(parser)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000942 (options, args) = parser.parse_args(args)
943 if not args:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500944 parser.error('Must specify one task name.')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000945 elif len(args) > 1:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500946 parser.error('Must specify only one task name.')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000947
948 try:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700949 return collect(
950 options.swarming,
951 args[0],
Vadim Shtayurab450c602014-05-12 19:23:25 -0700952 options.shards,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700953 options.timeout,
954 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700955 options.print_status_updates,
956 options.task_output_dir)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000957 except Failure as e:
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +0000958 tools.report_error(e)
959 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000960
961
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400962def CMDquery(parser, args):
963 """Returns information about the bots connected to the Swarming server."""
964 add_filter_options(parser)
965 parser.filter_group.add_option(
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400966 '--dead-only', action='store_true',
967 help='Only print dead bots, useful to reap them and reimage broken bots')
968 parser.filter_group.add_option(
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400969 '-k', '--keep-dead', action='store_true',
970 help='Do not filter out dead bots')
971 parser.filter_group.add_option(
972 '-b', '--bare', action='store_true',
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400973 help='Do not print out dimensions')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400974 options, args = parser.parse_args(args)
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400975
976 if options.keep_dead and options.dead_only:
977 parser.error('Use only one of --keep-dead and --dead-only')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400978 service = net.get_http_service(options.swarming)
979 data = service.json_request('GET', '/swarming/api/v1/bots')
980 if data is None:
981 print >> sys.stderr, 'Failed to access %s' % options.swarming
982 return 1
983 timeout = datetime.timedelta(seconds=data['machine_death_timeout'])
984 utcnow = datetime.datetime.utcnow()
Marc-Antoine Ruele4bebbc2014-06-04 09:36:14 -0400985 for machine in natsort.natsorted(data['machines'], key=lambda x: x['id']):
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400986 last_seen = datetime.datetime.strptime(
987 machine['last_seen'], '%Y-%m-%d %H:%M:%S')
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400988 is_dead = utcnow - last_seen > timeout
989 if options.dead_only:
990 if not is_dead:
991 continue
992 elif not options.keep_dead and is_dead:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400993 continue
994
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400995 # If the user requested to filter on dimensions, ensure the bot has all the
996 # dimensions requested.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400997 dimensions = machine['dimensions']
998 for key, value in options.dimensions:
999 if key not in dimensions:
1000 break
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -04001001 # A bot can have multiple value for a key, for example,
1002 # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
1003 # be accepted.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -04001004 if isinstance(dimensions[key], list):
1005 if value not in dimensions[key]:
1006 break
1007 else:
1008 if value != dimensions[key]:
1009 break
1010 else:
Marc-Antoine Ruele4bebbc2014-06-04 09:36:14 -04001011 print machine['id']
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -04001012 if not options.bare:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -04001013 print ' %s' % dimensions
1014 return 0
1015
1016
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001017@subcommand.usage('(hash|isolated) [-- extra_args]')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001018def CMDrun(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001019 """Triggers a task and wait for the results.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001020
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001021 Basically, does everything to run a command remotely.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001022 """
1023 add_trigger_options(parser)
1024 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001025 add_sharding_options(parser)
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001026 args, isolated_cmd_args = extract_isolated_command_extra_args(args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001027 options, args = parser.parse_args(args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001028 process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001029
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001030 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001031 tasks, task_name = trigger(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001032 swarming=options.swarming,
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05001033 isolate_server=options.isolate_server or options.indir,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001034 namespace=options.namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001035 file_hash_or_isolated=args[0],
1036 task_name=options.task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001037 extra_args=isolated_cmd_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001038 shards=options.shards,
1039 dimensions=options.dimensions,
1040 env=dict(options.env),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -04001041 deadline=options.deadline,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001042 verbose=options.verbose,
1043 profile=options.profile,
1044 priority=options.priority)
1045 except Failure as e:
1046 tools.report_error(
1047 'Failed to trigger %s(%s): %s' %
1048 (options.task_name, args[0], e.args[0]))
1049 return 1
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001050 if not tasks:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001051 tools.report_error('Failed to trigger the task.')
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001052 return 1
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001053 if task_name != options.task_name:
1054 print('Triggered task: %s' % task_name)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001055 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001056 # TODO(maruel): Use task_ids, it's much more efficient!
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001057 return collect(
1058 options.swarming,
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001059 task_name,
Vadim Shtayurab450c602014-05-12 19:23:25 -07001060 options.shards,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001061 options.timeout,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -07001062 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001063 options.print_status_updates,
1064 options.task_output_dir)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001065 except Failure as e:
1066 tools.report_error(e)
1067 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001068
1069
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001070@subcommand.usage("(hash|isolated) [-- extra_args]")
maruel@chromium.org0437a732013-08-27 16:05:52 +00001071def CMDtrigger(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001072 """Triggers a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001073
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001074 Accepts either the hash (sha1) of a .isolated file already uploaded or the
1075 path to an .isolated file to archive, packages it if needed and sends a
1076 Swarming manifest file to the Swarming server.
1077
1078 If an .isolated file is specified instead of an hash, it is first archived.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001079
1080 Passes all extra arguments provided after '--' as additional command line
1081 arguments for an isolated command specified in *.isolate file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001082 """
1083 add_trigger_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001084 add_sharding_options(parser)
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001085 args, isolated_cmd_args = extract_isolated_command_extra_args(args)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001086 parser.add_option(
1087 '--dump-json',
1088 metavar='FILE',
1089 help='Dump details about the triggered task(s) to this file as json')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001090 options, args = parser.parse_args(args)
1091 process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001092
1093 try:
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001094 tasks, task_name = trigger(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001095 swarming=options.swarming,
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05001096 isolate_server=options.isolate_server or options.indir,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001097 namespace=options.namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001098 file_hash_or_isolated=args[0],
1099 task_name=options.task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001100 extra_args=isolated_cmd_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001101 shards=options.shards,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001102 dimensions=options.dimensions,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -05001103 env=dict(options.env),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -04001104 deadline=options.deadline,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001105 verbose=options.verbose,
1106 profile=options.profile,
1107 priority=options.priority)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001108 if tasks:
1109 if task_name != options.task_name:
1110 print('Triggered task: %s' % task_name)
1111 if options.dump_json:
1112 data = {
1113 'base_task_name': task_name,
1114 'tasks': tasks,
1115 }
1116 tools.write_json(options.dump_json, data, True)
1117 return int(not tasks)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001118 except Failure as e:
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001119 tools.report_error(e)
1120 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001121
1122
1123class OptionParserSwarming(tools.OptionParserWithLogging):
1124 def __init__(self, **kwargs):
1125 tools.OptionParserWithLogging.__init__(
1126 self, prog='swarming.py', **kwargs)
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001127 self.server_group = tools.optparse.OptionGroup(self, 'Server')
1128 self.server_group.add_option(
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001129 '-S', '--swarming',
Kevin Graney5346c162014-01-24 12:20:01 -05001130 metavar='URL', default=os.environ.get('SWARMING_SERVER', ''),
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001131 help='Swarming server to use')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001132 self.add_option_group(self.server_group)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001133 auth.add_auth_options(self)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001134
1135 def parse_args(self, *args, **kwargs):
1136 options, args = tools.OptionParserWithLogging.parse_args(
1137 self, *args, **kwargs)
1138 options.swarming = options.swarming.rstrip('/')
1139 if not options.swarming:
1140 self.error('--swarming is required.')
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001141 auth.process_auth_options(self, options)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001142 return options, args
1143
1144
1145def main(args):
1146 dispatcher = subcommand.CommandDispatcher(__name__)
1147 try:
1148 return dispatcher.execute(OptionParserSwarming(version=__version__), args)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001149 except Exception as e:
1150 tools.report_error(e)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001151 return 1
1152
1153
1154if __name__ == '__main__':
1155 fix_encoding.fix_encoding()
1156 tools.disable_buffering()
1157 colorama.init()
1158 sys.exit(main(sys.argv[1:]))