blob: bf5b950960df7469311c51ced41b993de7a38962 [file] [log] [blame]
maruel@chromium.org0437a732013-08-27 16:05:52 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00005
6"""Client tool to trigger tasks or retrieve results from a Swarming server."""
7
Vadim Shtayurab450c602014-05-12 19:23:25 -07008__version__ = '0.4.8'
maruel@chromium.org0437a732013-08-27 16:05:52 +00009
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040010import datetime
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -050011import getpass
maruel@chromium.org0437a732013-08-27 16:05:52 +000012import hashlib
13import json
14import logging
15import os
Vadim Shtayurae3fbd102014-04-29 17:05:21 -070016import re
maruel@chromium.org0437a732013-08-27 16:05:52 +000017import shutil
maruel@chromium.org0437a732013-08-27 16:05:52 +000018import subprocess
19import sys
Vadim Shtayurab19319e2014-04-27 08:50:06 -070020import threading
maruel@chromium.org0437a732013-08-27 16:05:52 +000021import time
22import urllib
maruel@chromium.org0437a732013-08-27 16:05:52 +000023
24from third_party import colorama
25from third_party.depot_tools import fix_encoding
26from third_party.depot_tools import subcommand
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000027
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -050028from utils import file_path
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040029from third_party.chromium import natsort
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000030from utils import net
maruel@chromium.org0437a732013-08-27 16:05:52 +000031from utils import threading_utils
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000032from utils import tools
33from utils import zip_package
maruel@chromium.org0437a732013-08-27 16:05:52 +000034
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080035import auth
maruel@chromium.org7b844a62013-09-17 13:04:59 +000036import isolateserver
maruel@chromium.org0437a732013-08-27 16:05:52 +000037import run_isolated
38
39
40ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
41TOOLS_PATH = os.path.join(ROOT_DIR, 'tools')
42
43
maruel@chromium.org0437a732013-08-27 16:05:52 +000044# The default time to wait for a shard to finish running.
csharp@chromium.org24758492013-08-28 19:10:54 +000045DEFAULT_SHARD_WAIT_TIME = 80 * 60.
maruel@chromium.org0437a732013-08-27 16:05:52 +000046
Vadim Shtayura86a2cef2014-04-18 11:13:39 -070047# How often to print status updates to stdout in 'collect'.
48STATUS_UPDATE_INTERVAL = 15 * 60.
49
maruel@chromium.org0437a732013-08-27 16:05:52 +000050
51NO_OUTPUT_FOUND = (
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -050052 'No output produced by the task, it may have failed to run.\n'
maruel@chromium.org0437a732013-08-27 16:05:52 +000053 '\n')
54
55
maruel@chromium.org0437a732013-08-27 16:05:52 +000056class Failure(Exception):
57 """Generic failure."""
58 pass
59
60
61class Manifest(object):
Vadim Shtayurab450c602014-05-12 19:23:25 -070062 """Represents a Swarming task manifest."""
maruel@chromium.org0437a732013-08-27 16:05:52 +000063
maruel@chromium.org0437a732013-08-27 16:05:52 +000064 def __init__(
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070065 self, isolate_server, namespace, isolated_hash, task_name, extra_args,
Marc-Antoine Ruelaea50652014-06-12 14:23:48 -040066 env, dimensions, deadline, verbose, profile,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070067 priority):
maruel@chromium.org0437a732013-08-27 16:05:52 +000068 """Populates a manifest object.
69 Args:
Marc-Antoine Ruela7049872013-11-05 19:28:35 -050070 isolate_server - isolate server url.
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050071 namespace - isolate server namespace to use.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070072 isolated_hash - the manifest's sha-1 that the slave is going to fetch.
73 task_name - the name to give the task request.
74 extra_args - additional arguments to pass to isolated command.
Marc-Antoine Ruel05dab5e2013-11-06 15:06:47 -050075 env - environment variables to set.
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -050076 dimensions - dimensions to filter the task on.
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -040077 deadline - maximum pending time before this task expires.
maruel@chromium.org0437a732013-08-27 16:05:52 +000078 verbose - if True, have the slave print more details.
79 profile - if True, have the slave print more timing data.
maruel@chromium.org7b844a62013-09-17 13:04:59 +000080 priority - int between 0 and 1000, lower the higher priority.
maruel@chromium.org0437a732013-08-27 16:05:52 +000081 """
Marc-Antoine Ruela7049872013-11-05 19:28:35 -050082 self.isolate_server = isolate_server
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050083 self.namespace = namespace
maruel@chromium.org814d23f2013-10-01 19:08:00 +000084 self.isolated_hash = isolated_hash
Vadim Shtayurab450c602014-05-12 19:23:25 -070085 self.task_name = task_name
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070086 self.extra_args = tuple(extra_args or [])
Vadim Shtayurab450c602014-05-12 19:23:25 -070087 self.env = env.copy()
88 self.dimensions = dimensions.copy()
Vadim Shtayurab450c602014-05-12 19:23:25 -070089 self.deadline = deadline
maruel@chromium.org0437a732013-08-27 16:05:52 +000090 self.verbose = bool(verbose)
91 self.profile = bool(profile)
92 self.priority = priority
maruel@chromium.org0437a732013-08-27 16:05:52 +000093 self._tasks = []
Vadim Shtayurab450c602014-05-12 19:23:25 -070094 self._files = []
maruel@chromium.org0437a732013-08-27 16:05:52 +000095
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -040096 def add_task(self, task_name, actions, time_out=2*60*60):
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -050097 """Appends a new task as a TestObject to the swarming manifest file.
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -050098
99 Tasks cannot be added once the manifest was uploaded.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500100
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -0400101 By default, command will be killed after 2 hours of execution.
102
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500103 See TestObject in services/swarming/src/common/test_request_message.py for
104 the valid format.
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500105 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000106 self._tasks.append(
107 {
108 'action': actions,
109 'decorate_output': self.verbose,
110 'test_name': task_name,
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -0400111 'hard_time_out': time_out,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000112 })
113
Vadim Shtayurab450c602014-05-12 19:23:25 -0700114 def add_bundled_file(self, file_name, file_url):
115 """Appends a file to the manifest.
116
117 File will be downloaded and extracted by the swarm bot before launching the
118 task.
119 """
120 self._files.append([file_url, file_name])
121
maruel@chromium.org0437a732013-08-27 16:05:52 +0000122 def to_json(self):
123 """Exports the current configuration into a swarm-readable manifest file.
124
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500125 The actual serialization format is defined as a TestCase object as described
126 in services/swarming/src/common/test_request_message.py
maruel@chromium.org0437a732013-08-27 16:05:52 +0000127 """
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500128 request = {
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500129 'cleanup': 'root',
maruel@chromium.org0437a732013-08-27 16:05:52 +0000130 'configurations': [
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500131 # Is a TestConfiguration.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000132 {
Marc-Antoine Ruel5d799192013-11-06 15:20:39 -0500133 'config_name': 'isolated',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700134 'deadline_to_run': self.deadline,
135 'dimensions': self.dimensions,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500136 'priority': self.priority,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000137 },
138 ],
Vadim Shtayurab450c602014-05-12 19:23:25 -0700139 'data': self._files,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700140 'env_vars': self.env,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700141 'test_case_name': self.task_name,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500142 'tests': self._tasks,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000143 }
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500144 return json.dumps(request, sort_keys=True, separators=(',',':'))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000145
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500146
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700147class TaskOutputCollector(object):
148 """Fetches task output from isolate server to local disk.
149
150 This object is shared among multiple threads running 'retrieve_results'
151 function, in particular they call 'process_shard_result' method in parallel.
152 """
153
154 def __init__(self, task_output_dir, task_name, shard_count):
155 """Initializes TaskOutputCollector, ensures |task_output_dir| exists.
156
157 Args:
158 task_output_dir: local directory to put fetched files to.
159 task_name: name of the swarming task results belong to.
160 shard_count: expected number of task shards.
161 """
162 self.task_output_dir = task_output_dir
163 self.task_name = task_name
164 self.shard_count = shard_count
165
166 self._lock = threading.Lock()
167 self._per_shard_results = {}
168 self._storage = None
169
170 if not os.path.isdir(self.task_output_dir):
171 os.makedirs(self.task_output_dir)
172
Vadim Shtayurab450c602014-05-12 19:23:25 -0700173 def process_shard_result(self, shard_index, result):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700174 """Stores results of a single task shard, fetches output files if necessary.
175
176 Called concurrently from multiple threads.
177 """
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700178 # Sanity check index is in expected range.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700179 assert isinstance(shard_index, int)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700180 if shard_index < 0 or shard_index >= self.shard_count:
181 logging.warning(
182 'Shard index %d is outside of expected range: [0; %d]',
183 shard_index, self.shard_count - 1)
184 return
185
186 # Store result dict of that shard, ignore results we've already seen.
187 with self._lock:
188 if shard_index in self._per_shard_results:
189 logging.warning('Ignoring duplicate shard index %d', shard_index)
190 return
191 self._per_shard_results[shard_index] = result
192
193 # Fetch output files if necessary.
194 isolated_files_location = extract_output_files_location(result['output'])
195 if isolated_files_location:
196 isolate_server, namespace, isolated_hash = isolated_files_location
197 storage = self._get_storage(isolate_server, namespace)
198 if storage:
199 # Output files are supposed to be small and they are not reused across
200 # tasks. So use MemoryCache for them instead of on-disk cache. Make
201 # files writable, so that calling script can delete them.
202 isolateserver.fetch_isolated(
203 isolated_hash,
204 storage,
205 isolateserver.MemoryCache(file_mode_mask=0700),
206 os.path.join(self.task_output_dir, str(shard_index)),
207 False)
208
209 def finalize(self):
210 """Writes summary.json, shutdowns underlying Storage."""
211 with self._lock:
212 # Write an array of shard results with None for missing shards.
213 summary = {
214 'task_name': self.task_name,
215 'shards': [
216 self._per_shard_results.get(i) for i in xrange(self.shard_count)
217 ],
218 }
219 tools.write_json(
220 os.path.join(self.task_output_dir, 'summary.json'),
221 summary,
222 False)
223 if self._storage:
224 self._storage.close()
225 self._storage = None
226
227 def _get_storage(self, isolate_server, namespace):
228 """Returns isolateserver.Storage to use to fetch files."""
229 with self._lock:
230 if not self._storage:
231 self._storage = isolateserver.get_storage(isolate_server, namespace)
232 else:
233 # Shards must all use exact same isolate server and namespace.
234 if self._storage.location != isolate_server:
235 logging.error(
236 'Task shards are using multiple isolate servers: %s and %s',
237 self._storage.location, isolate_server)
238 return None
239 if self._storage.namespace != namespace:
240 logging.error(
241 'Task shards are using multiple namespaces: %s and %s',
242 self._storage.namespace, namespace)
243 return None
244 return self._storage
245
246
maruel@chromium.org0437a732013-08-27 16:05:52 +0000247def now():
248 """Exists so it can be mocked easily."""
249 return time.time()
250
251
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500252def get_task_keys(swarm_base_url, task_name):
253 """Returns the Swarming task key for each shards of task_name."""
254 key_data = urllib.urlencode([('name', task_name)])
maruel@chromium.org0437a732013-08-27 16:05:52 +0000255 url = '%s/get_matching_test_cases?%s' % (swarm_base_url, key_data)
256
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000257 for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
258 result = net.url_read(url, retry_404=True)
259 if result is None:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000260 raise Failure(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500261 'Error: Unable to find any task with the name, %s, on swarming server'
262 % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000263
maruel@chromium.org0437a732013-08-27 16:05:52 +0000264 # TODO(maruel): Compare exact string.
265 if 'No matching' in result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500266 logging.warning('Unable to find any task with the name, %s, on swarming '
267 'server' % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000268 continue
269 return json.loads(result)
270
271 raise Failure(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500272 'Error: Unable to find any task with the name, %s, on swarming server'
273 % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000274
275
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700276def extract_output_files_location(task_log):
277 """Task log -> location of task output files to fetch.
278
279 TODO(vadimsh,maruel): Use side-channel to get this information.
280 See 'run_tha_test' in run_isolated.py for where the data is generated.
281
282 Returns:
283 Tuple (isolate server URL, namespace, isolated hash) on success.
284 None if information is missing or can not be parsed.
285 """
286 match = re.search(
287 r'\[run_isolated_out_hack\](.*)\[/run_isolated_out_hack\]',
288 task_log,
289 re.DOTALL)
290 if not match:
291 return None
292
293 def to_ascii(val):
294 if not isinstance(val, basestring):
295 raise ValueError()
296 return val.encode('ascii')
297
298 try:
299 data = json.loads(match.group(1))
300 if not isinstance(data, dict):
301 raise ValueError()
302 isolated_hash = to_ascii(data['hash'])
303 namespace = to_ascii(data['namespace'])
304 isolate_server = to_ascii(data['storage'])
305 if not file_path.is_url(isolate_server):
306 raise ValueError()
307 return (isolate_server, namespace, isolated_hash)
308 except (KeyError, ValueError):
309 logging.warning(
310 'Unexpected value of run_isolated_out_hack: %s', match.group(1))
311 return None
312
313
314def retrieve_results(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700315 base_url, shard_index, task_key, timeout, should_stop, output_collector):
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700316 """Retrieves results for a single task_key.
317
Vadim Shtayurab450c602014-05-12 19:23:25 -0700318 Returns:
319 <result dict> on success.
320 None on failure.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700321 """
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000322 assert isinstance(timeout, float), timeout
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500323 params = [('r', task_key)]
maruel@chromium.org0437a732013-08-27 16:05:52 +0000324 result_url = '%s/get_result?%s' % (base_url, urllib.urlencode(params))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700325 started = now()
326 deadline = started + timeout if timeout else None
327 attempt = 0
328
329 while not should_stop.is_set():
330 attempt += 1
331
332 # Waiting for too long -> give up.
333 current_time = now()
334 if deadline and current_time >= deadline:
335 logging.error('retrieve_results(%s) timed out on attempt %d',
336 base_url, attempt)
337 return None
338
339 # Do not spin too fast. Spin faster at the beginning though.
340 # Start with 1 sec delay and for each 30 sec of waiting add another second
341 # of delay, until hitting 15 sec ceiling.
342 if attempt > 1:
343 max_delay = min(15, 1 + (current_time - started) / 30.0)
344 delay = min(max_delay, deadline - current_time) if deadline else max_delay
345 if delay > 0:
346 logging.debug('Waiting %.1f sec before retrying', delay)
347 should_stop.wait(delay)
348 if should_stop.is_set():
349 return None
350
351 # Disable internal retries in net.url_read, since we are doing retries
352 # ourselves. Do not use retry_404 so should_stop is polled more often.
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000353 response = net.url_read(result_url, retry_404=False, retry_50x=False)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700354
355 # Request failed. Try again.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000356 if response is None:
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700357 continue
358
359 # Got some response, ensure it is JSON dict, retry if not.
360 try:
361 result = json.loads(response) or {}
362 if not isinstance(result, dict):
363 raise ValueError()
364 except (ValueError, TypeError):
365 logging.warning(
366 'Received corrupted or invalid data for task_key %s, retrying: %r',
367 task_key, response)
368 continue
369
370 # Swarming server uses non-empty 'output' value as a flag that task has
371 # finished. How to wait for tasks that produce no output is a mystery.
372 if result.get('output'):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700373 # Record the result, try to fetch attached output files (if any).
374 if output_collector:
375 # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700376 output_collector.process_shard_result(shard_index, result)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700377 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +0000378
379
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700380def yield_results(
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700381 swarm_base_url, task_keys, timeout, max_threads,
382 print_status_updates, output_collector):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500383 """Yields swarming task results from the swarming server as (index, result).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000384
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700385 Duplicate shards are ignored. Shards are yielded in order of completion.
386 Timed out shards are NOT yielded at all. Caller can compare number of yielded
387 shards with len(task_keys) to verify all shards completed.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000388
389 max_threads is optional and is used to limit the number of parallel fetches
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500390 done. Since in general the number of task_keys is in the range <=10, it's not
maruel@chromium.org0437a732013-08-27 16:05:52 +0000391 worth normally to limit the number threads. Mostly used for testing purposes.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500392
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700393 output_collector is an optional instance of TaskOutputCollector that will be
394 used to fetch files produced by a task from isolate server to the local disk.
395
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500396 Yields:
397 (index, result). In particular, 'result' is defined as the
398 GetRunnerResults() function in services/swarming/server/test_runner.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000399 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000400 number_threads = (
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500401 min(max_threads, len(task_keys)) if max_threads else len(task_keys))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700402 should_stop = threading.Event()
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700403 results_channel = threading_utils.TaskChannel()
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700404
maruel@chromium.org0437a732013-08-27 16:05:52 +0000405 with threading_utils.ThreadPool(number_threads, number_threads, 0) as pool:
406 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700407 # Adds a task to the thread pool to call 'retrieve_results' and return
408 # the results together with shard_index that produced them (as a tuple).
409 def enqueue_retrieve_results(shard_index, task_key):
410 task_fn = lambda *args: (shard_index, retrieve_results(*args))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000411 pool.add_task(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700412 0, results_channel.wrap_task(task_fn),
413 swarm_base_url, shard_index, task_key, timeout,
414 should_stop, output_collector)
415
416 # Enqueue 'retrieve_results' calls for each shard key to run in parallel.
417 for shard_index, task_key in enumerate(task_keys):
418 enqueue_retrieve_results(shard_index, task_key)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700419
420 # Wait for all of them to finish.
421 shards_remaining = range(len(task_keys))
422 active_task_count = len(task_keys)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700423 while active_task_count:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700424 shard_index, result = None, None
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700425 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700426 shard_index, result = results_channel.pull(
427 timeout=STATUS_UPDATE_INTERVAL)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700428 except threading_utils.TaskChannel.Timeout:
429 if print_status_updates:
430 print(
431 'Waiting for results from the following shards: %s' %
432 ', '.join(map(str, shards_remaining)))
433 sys.stdout.flush()
434 continue
435 except Exception:
436 logging.exception('Unexpected exception in retrieve_results')
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700437
438 # A call to 'retrieve_results' finished (successfully or not).
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700439 active_task_count -= 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000440 if not result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500441 logging.error('Failed to retrieve the results for a swarming key')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000442 continue
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700443
Vadim Shtayurab450c602014-05-12 19:23:25 -0700444 # Yield back results to the caller.
445 assert shard_index in shards_remaining
446 shards_remaining.remove(shard_index)
447 yield shard_index, result
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700448
maruel@chromium.org0437a732013-08-27 16:05:52 +0000449 finally:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700450 # Done or aborted with Ctrl+C, kill the remaining threads.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000451 should_stop.set()
452
453
Vadim Shtayurab450c602014-05-12 19:23:25 -0700454def setup_run_isolated(manifest, bundle):
455 """Sets up the manifest to run an isolated task via run_isolated.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000456
Vadim Shtayurab450c602014-05-12 19:23:25 -0700457 Modifies |bundle| (by adding files) and |manifest| (by adding commands) in
458 place.
459
460 Args:
461 manifest: Manifest with swarm task definition.
462 bundle: ZipPackage with files that would be transfered to swarm bot.
463 If None, only |manifest| is modified (useful in tests).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000464 """
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000465 # Add uncompressed zip here. It'll be compressed as part of the package sent
466 # to Swarming server.
467 run_test_name = 'run_isolated.zip'
Vadim Shtayurab450c602014-05-12 19:23:25 -0700468 if bundle and run_test_name not in bundle.files:
469 bundle.add_buffer(
470 run_test_name,
471 run_isolated.get_as_zip_package().zip_into_buffer(compress=False))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000472
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000473 cleanup_script_name = 'swarm_cleanup.py'
Vadim Shtayurab450c602014-05-12 19:23:25 -0700474 if bundle and cleanup_script_name not in bundle.files:
475 bundle.add_file(
476 os.path.join(TOOLS_PATH, cleanup_script_name), cleanup_script_name)
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000477
maruel@chromium.org0437a732013-08-27 16:05:52 +0000478 run_cmd = [
479 'python', run_test_name,
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000480 '--hash', manifest.isolated_hash,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500481 '--namespace', manifest.namespace,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000482 ]
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500483 if file_path.is_url(manifest.isolate_server):
484 run_cmd.extend(('--isolate-server', manifest.isolate_server))
485 else:
486 run_cmd.extend(('--indir', manifest.isolate_server))
487
maruel@chromium.org0437a732013-08-27 16:05:52 +0000488 if manifest.verbose or manifest.profile:
489 # Have it print the profiling section.
490 run_cmd.append('--verbose')
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700491
492 # Pass all extra args for run_isolated.py, it will pass them to the command.
493 if manifest.extra_args:
494 run_cmd.append('--')
495 run_cmd.extend(manifest.extra_args)
496
maruel@chromium.org0437a732013-08-27 16:05:52 +0000497 manifest.add_task('Run Test', run_cmd)
498
499 # Clean up
500 manifest.add_task('Clean Up', ['python', cleanup_script_name])
501
502
Vadim Shtayurab450c602014-05-12 19:23:25 -0700503def setup_googletest(env, shards, index):
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500504 """Sets googletest specific environment variables."""
505 if shards > 1:
506 env = env.copy()
Vadim Shtayurab450c602014-05-12 19:23:25 -0700507 env['GTEST_SHARD_INDEX'] = str(index)
508 env['GTEST_TOTAL_SHARDS'] = str(shards)
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500509 return env
510
511
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500512def archive(isolate_server, namespace, isolated, algo, verbose):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000513 """Archives a .isolated and all the dependencies on the CAC."""
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500514 logging.info('archive(%s, %s, %s)', isolate_server, namespace, isolated)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000515 tempdir = None
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500516 if file_path.is_url(isolate_server):
517 command = 'archive'
518 flag = '--isolate-server'
519 else:
520 command = 'hashtable'
521 flag = '--outdir'
522
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500523 print('Archiving: %s' % isolated)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000524 try:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000525 cmd = [
526 sys.executable,
527 os.path.join(ROOT_DIR, 'isolate.py'),
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500528 command,
529 flag, isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500530 '--namespace', namespace,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000531 '--isolated', isolated,
532 ]
maruel@chromium.orge9403ab2013-09-20 18:03:49 +0000533 cmd.extend(['--verbose'] * verbose)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000534 logging.info(' '.join(cmd))
535 if subprocess.call(cmd, verbose):
536 return
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000537 return isolateserver.hash_file(isolated, algo)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000538 finally:
539 if tempdir:
540 shutil.rmtree(tempdir)
541
542
Vadim Shtayurab450c602014-05-12 19:23:25 -0700543def get_shard_task_name(task_name, shards, index):
544 """Returns a task name to use for a single shard of a task."""
545 if shards == 1:
546 return task_name
547 return '%s:%s:%s' % (task_name, shards, index)
548
549
550def upload_zip_bundle(isolate_server, bundle):
551 """Uploads a zip package to isolate storage and returns raw fetch URL.
552
553 Args:
554 isolate_server: URL of an isolate server.
555 bundle: instance of ZipPackage to upload.
556
557 Returns:
558 URL to get the file from on success.
559 None on failure.
560 """
561 # Swarming bot would need to be able to grab the file from the storage
562 # using raw HTTP GET. Use 'default' namespace so that the raw data returned
563 # to a bot is not zipped, since swarm_bot doesn't understand compressed
564 # data yet. This namespace have nothing to do with |namespace| passed to
565 # run_isolated.py that is used to store files for isolated task.
566 logging.info('Zipping up and uploading files...')
567 try:
568 start_time = now()
569 isolate_item = isolateserver.BufferItem(
570 bundle.zip_into_buffer(), high_priority=True)
571 with isolateserver.get_storage(isolate_server, 'default') as storage:
572 uploaded = storage.upload_items([isolate_item])
573 bundle_url = storage.get_fetch_url(isolate_item)
574 elapsed = now() - start_time
575 except (IOError, OSError) as exc:
576 tools.report_error('Failed to upload the zip file: %s' % exc)
577 return None
578 if isolate_item in uploaded:
579 logging.info('Upload complete, time elapsed: %f', elapsed)
580 else:
581 logging.info('Zip file already on server, time elapsed: %f', elapsed)
582 return bundle_url
583
584
585def trigger_by_manifest(swarming, manifest):
586 """Given a task manifest, triggers it for execution on swarming.
587
588 Args:
589 swarming: URL of a swarming service.
590 manifest: instance of Manifest.
591
592 Returns:
593 True on success, False on failure.
594 """
595 logging.info('Triggering: %s', manifest.task_name)
596 manifest_text = manifest.to_json()
597 result = net.url_read(swarming + '/test', data={'request': manifest_text})
598 if not result:
599 tools.report_error('Failed to trigger task %s' % manifest.task_name)
600 return False
601 try:
602 json.loads(result)
603 except (ValueError, TypeError) as e:
604 msg = '\n'.join((
605 'Failed to trigger task %s' % manifest.task_name,
606 'Manifest: %s' % manifest_text,
607 'Bad response: %s' % result,
608 str(e)))
609 tools.report_error(msg)
610 return False
611 return True
612
613
614def abort_by_manifest(_swarming, _manifest):
615 """Given a task manifest that was triggered, aborts its execution."""
616 # TODO(vadimsh): No supported by the server yet.
617
618
619def trigger_task_shards(
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700620 swarming, isolate_server, namespace, isolated_hash, task_name, extra_args,
Marc-Antoine Ruelaea50652014-06-12 14:23:48 -0400621 shards, dimensions, env, deadline, verbose, profile, priority):
Vadim Shtayurab450c602014-05-12 19:23:25 -0700622 """Triggers multiple subtasks of a sharded task."""
623 # Collects all files that are necessary to bootstrap a task execution
624 # on the bot. Usually it includes self contained run_isolated.zip and
625 # a bunch of small other scripts. All heavy files are pulled
626 # by run_isolated.zip. Updated in 'setup_run_isolated'.
627 bundle = zip_package.ZipPackage(ROOT_DIR)
628
629 # Make a separate Manifest for each shard, put shard index and number of
630 # shards into env and subtask name.
631 manifests = []
632 for index in xrange(shards):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000633 manifest = Manifest(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500634 isolate_server=isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500635 namespace=namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500636 isolated_hash=isolated_hash,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700637 task_name=get_shard_task_name(task_name, shards, index),
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700638 extra_args=extra_args,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500639 dimensions=dimensions,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700640 env=setup_googletest(env, shards, index),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400641 deadline=deadline,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500642 verbose=verbose,
643 profile=profile,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800644 priority=priority)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700645 setup_run_isolated(manifest, bundle)
646 manifests.append(manifest)
647
648 # Upload zip bundle file to get its URL.
649 bundle_url = upload_zip_bundle(isolate_server, bundle)
650 if not bundle_url:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000651 return 1
652
Vadim Shtayurab450c602014-05-12 19:23:25 -0700653 # Attach that file to all manifests.
654 for manifest in manifests:
655 manifest.add_bundled_file('swarm_data.zip', bundle_url)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000656
Vadim Shtayurab450c602014-05-12 19:23:25 -0700657 # Trigger all the subtasks.
658 triggered = []
659 for manifest in manifests:
660 if trigger_by_manifest(swarming, manifest):
661 triggered.append(manifest)
662 else:
663 break
664
665 # Some shards weren't triggered. Abort everything.
666 if len(triggered) != len(manifests):
667 if triggered:
668 print >> sys.stderr, 'Not all shards were triggered'
669 for manifest in triggered:
670 abort_by_manifest(swarming, manifest)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000671 return 1
672
maruel@chromium.org0437a732013-08-27 16:05:52 +0000673 return 0
674
675
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500676def isolated_to_hash(isolate_server, namespace, arg, algo, verbose):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500677 """Archives a .isolated file if needed.
678
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500679 Returns the file hash to trigger and a bool specifying if it was a file (True)
680 or a hash (False).
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500681 """
682 if arg.endswith('.isolated'):
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500683 file_hash = archive(isolate_server, namespace, arg, algo, verbose)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500684 if not file_hash:
685 tools.report_error('Archival failure %s' % arg)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500686 return None, True
687 return file_hash, True
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500688 elif isolateserver.is_valid_hash(arg, algo):
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500689 return arg, False
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500690 else:
691 tools.report_error('Invalid hash %s' % arg)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500692 return None, False
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500693
694
maruel@chromium.org0437a732013-08-27 16:05:52 +0000695def trigger(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500696 swarming,
697 isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500698 namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500699 file_hash_or_isolated,
700 task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700701 extra_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500702 shards,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500703 dimensions,
704 env,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400705 deadline,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000706 verbose,
707 profile,
708 priority):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500709 """Sends off the hash swarming task requests."""
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500710 file_hash, is_file = isolated_to_hash(
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500711 isolate_server, namespace, file_hash_or_isolated, hashlib.sha1, verbose)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500712 if not file_hash:
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500713 return 1, ''
714 if not task_name:
715 # If a file name was passed, use its base name of the isolated hash.
716 # Otherwise, use user name as an approximation of a task name.
717 if is_file:
718 key = os.path.splitext(os.path.basename(file_hash_or_isolated))[0]
719 else:
720 key = getpass.getuser()
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700721 task_name = '%s/%s/%s/%d' % (
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500722 key,
723 '_'.join('%s=%s' % (k, v) for k, v in sorted(dimensions.iteritems())),
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700724 file_hash,
725 now() * 1000)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500726
Vadim Shtayurab450c602014-05-12 19:23:25 -0700727 result = trigger_task_shards(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500728 swarming=swarming,
729 isolate_server=isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500730 namespace=namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500731 isolated_hash=file_hash,
732 task_name=task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700733 extra_args=extra_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500734 shards=shards,
735 dimensions=dimensions,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400736 deadline=deadline,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500737 env=env,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500738 verbose=verbose,
739 profile=profile,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800740 priority=priority)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500741 return result, task_name
maruel@chromium.org0437a732013-08-27 16:05:52 +0000742
743
Vadim Shtayurab450c602014-05-12 19:23:25 -0700744def decorate_shard_output(shard_index, result, shard_exit_code):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000745 """Returns wrapped output for swarming task shard."""
746 tag = 'index %s (machine tag: %s, id: %s)' % (
Vadim Shtayurab450c602014-05-12 19:23:25 -0700747 shard_index,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000748 result['machine_id'],
749 result.get('machine_tag', 'unknown'))
750 return (
751 '\n'
752 '================================================================\n'
753 'Begin output from shard %s\n'
754 '================================================================\n'
755 '\n'
756 '%s'
757 '================================================================\n'
Vadim Shtayura473455a2014-05-14 15:22:35 -0700758 'End output from shard %s.\nExit code %d (%s).\n'
759 '================================================================\n') % (
760 tag, result['output'] or NO_OUTPUT_FOUND, tag,
761 shard_exit_code, hex(0xffffffff & shard_exit_code))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000762
763
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700764def collect(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700765 url, task_name, shards, timeout, decorate,
766 print_status_updates, task_output_dir):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500767 """Retrieves results of a Swarming task."""
Vadim Shtayurab450c602014-05-12 19:23:25 -0700768 # Grab task keys for each shard. Order is important, used to figure out
769 # shard index based on the key.
770 # TODO(vadimsh): Simplify this once server support is added.
771 task_keys = []
772 for index in xrange(shards):
773 shard_task_name = get_shard_task_name(task_name, shards, index)
774 logging.info('Collecting %s', shard_task_name)
775 shard_task_keys = get_task_keys(url, shard_task_name)
776 if not shard_task_keys:
777 raise Failure('No task keys to get results with: %s' % shard_task_name)
778 if len(shard_task_keys) != 1:
779 raise Failure('Expecting only one shard for a task: %s' % shard_task_name)
780 task_keys.append(shard_task_keys[0])
maruel@chromium.org0437a732013-08-27 16:05:52 +0000781
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700782 # Collect output files only if explicitly asked with --task-output-dir option.
783 if task_output_dir:
784 output_collector = TaskOutputCollector(
785 task_output_dir, task_name, len(task_keys))
786 else:
787 output_collector = None
788
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700789 seen_shards = set()
Vadim Shtayurac524f512014-05-15 09:54:56 -0700790 exit_codes = []
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700791
792 try:
793 for index, output in yield_results(
794 url, task_keys, timeout, None, print_status_updates, output_collector):
795 seen_shards.add(index)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700796
797 # Grab first non-zero exit code as an overall shard exit code.
798 shard_exit_code = 0
799 for code in map(int, (output['exit_codes'] or '1').split(',')):
800 if code:
801 shard_exit_code = code
802 break
Vadim Shtayurac524f512014-05-15 09:54:56 -0700803 exit_codes.append(shard_exit_code)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700804
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700805 if decorate:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700806 print decorate_shard_output(index, output, shard_exit_code)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700807 else:
808 print(
809 '%s/%s: %s' % (
810 output['machine_id'],
811 output['machine_tag'],
812 output['exit_codes']))
813 print(''.join(' %s\n' % l for l in output['output'].splitlines()))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700814 finally:
815 if output_collector:
816 output_collector.finalize()
817
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700818 if len(seen_shards) != len(task_keys):
819 missing_shards = [x for x in range(len(task_keys)) if x not in seen_shards]
820 print >> sys.stderr, ('Results from some shards are missing: %s' %
821 ', '.join(map(str, missing_shards)))
Vadim Shtayurac524f512014-05-15 09:54:56 -0700822 return 1
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700823
Vadim Shtayurac524f512014-05-15 09:54:56 -0700824 return int(bool(any(exit_codes)))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000825
826
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400827def add_filter_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500828 parser.filter_group = tools.optparse.OptionGroup(parser, 'Filtering slaves')
829 parser.filter_group.add_option(
Marc-Antoine Ruelb39e8cf2014-01-20 10:39:31 -0500830 '-d', '--dimension', default=[], action='append', nargs=2,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500831 dest='dimensions', metavar='FOO bar',
832 help='dimension to filter on')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500833 parser.add_option_group(parser.filter_group)
834
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400835
Marc-Antoine Ruel025e7822014-05-01 11:50:24 -0400836def process_filter_options(parser, options):
837 options.dimensions = dict(options.dimensions)
838 if not options.dimensions:
839 parser.error('Please at least specify one --dimension')
840
841
Vadim Shtayurab450c602014-05-12 19:23:25 -0700842def add_sharding_options(parser):
843 parser.sharding_group = tools.optparse.OptionGroup(parser, 'Sharding options')
844 parser.sharding_group.add_option(
845 '--shards', type='int', default=1,
846 help='Number of shards to trigger and collect.')
847 parser.add_option_group(parser.sharding_group)
848
849
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400850def add_trigger_options(parser):
851 """Adds all options to trigger a task on Swarming."""
852 isolateserver.add_isolate_server_options(parser, True)
853 add_filter_options(parser)
854
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500855 parser.task_group = tools.optparse.OptionGroup(parser, 'Task properties')
856 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500857 '-e', '--env', default=[], action='append', nargs=2, metavar='FOO bar',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700858 help='Environment variables to set')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500859 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500860 '--priority', type='int', default=100,
861 help='The lower value, the more important the task is')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500862 parser.task_group.add_option(
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500863 '-T', '--task-name',
864 help='Display name of the task. It uniquely identifies the task. '
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700865 'Defaults to <base_name>/<dimensions>/<isolated hash>/<timestamp> '
866 'if an isolated file is provided, if a hash is provided, it '
867 'defaults to <user>/<dimensions>/<isolated hash>/<timestamp>')
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400868 parser.task_group.add_option(
869 '--deadline', type='int', default=6*60*60,
870 help='Seconds to allow the task to be pending for a bot to run before '
871 'this task request expires.')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500872 parser.add_option_group(parser.task_group)
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500873 # TODO(maruel): This is currently written in a chromium-specific way.
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500874 parser.group_logging.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000875 '--profile', action='store_true',
876 default=bool(os.environ.get('ISOLATE_DEBUG')),
877 help='Have run_isolated.py print profiling info')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000878
879
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500880def process_trigger_options(parser, options, args):
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500881 isolateserver.process_isolate_server_options(parser, options)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500882 if len(args) != 1:
883 parser.error('Must pass one .isolated file or its hash (sha1).')
Marc-Antoine Ruel025e7822014-05-01 11:50:24 -0400884 process_filter_options(parser, options)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000885
886
887def add_collect_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500888 parser.server_group.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000889 '-t', '--timeout',
890 type='float',
891 default=DEFAULT_SHARD_WAIT_TIME,
892 help='Timeout to wait for result, set to 0 for no timeout; default: '
893 '%default s')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500894 parser.group_logging.add_option(
895 '--decorate', action='store_true', help='Decorate output')
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700896 parser.group_logging.add_option(
897 '--print-status-updates', action='store_true',
898 help='Print periodic status updates')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700899 parser.task_output_group = tools.optparse.OptionGroup(parser, 'Task output')
900 parser.task_output_group.add_option(
901 '--task-output-dir',
902 help='Directory to put task results into. When the task finishes, this '
903 'directory contains <task-output-dir>/summary.json file with '
904 'a summary of task results across all shards, and per-shard '
905 'directory with output files produced by a shard: '
906 '<task-output-dir>/<zero-based-shard-index>/')
907 parser.add_option_group(parser.task_output_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000908
909
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700910def extract_isolated_command_extra_args(args):
911 try:
912 index = args.index('--')
913 except ValueError:
914 return (args, [])
915 return (args[:index], args[index+1:])
916
917
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500918@subcommand.usage('task_name')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000919def CMDcollect(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500920 """Retrieves results of a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000921
922 The result can be in multiple part if the execution was sharded. It can
923 potentially have retries.
924 """
925 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700926 add_sharding_options(parser)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000927 (options, args) = parser.parse_args(args)
928 if not args:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500929 parser.error('Must specify one task name.')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000930 elif len(args) > 1:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500931 parser.error('Must specify only one task name.')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000932
933 try:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700934 return collect(
935 options.swarming,
936 args[0],
Vadim Shtayurab450c602014-05-12 19:23:25 -0700937 options.shards,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700938 options.timeout,
939 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700940 options.print_status_updates,
941 options.task_output_dir)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000942 except Failure as e:
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +0000943 tools.report_error(e)
944 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000945
946
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400947def CMDquery(parser, args):
948 """Returns information about the bots connected to the Swarming server."""
949 add_filter_options(parser)
950 parser.filter_group.add_option(
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400951 '--dead-only', action='store_true',
952 help='Only print dead bots, useful to reap them and reimage broken bots')
953 parser.filter_group.add_option(
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400954 '-k', '--keep-dead', action='store_true',
955 help='Do not filter out dead bots')
956 parser.filter_group.add_option(
957 '-b', '--bare', action='store_true',
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400958 help='Do not print out dimensions')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400959 options, args = parser.parse_args(args)
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400960
961 if options.keep_dead and options.dead_only:
962 parser.error('Use only one of --keep-dead and --dead-only')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400963 service = net.get_http_service(options.swarming)
964 data = service.json_request('GET', '/swarming/api/v1/bots')
965 if data is None:
966 print >> sys.stderr, 'Failed to access %s' % options.swarming
967 return 1
968 timeout = datetime.timedelta(seconds=data['machine_death_timeout'])
969 utcnow = datetime.datetime.utcnow()
Marc-Antoine Ruele4bebbc2014-06-04 09:36:14 -0400970 for machine in natsort.natsorted(data['machines'], key=lambda x: x['id']):
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400971 last_seen = datetime.datetime.strptime(
972 machine['last_seen'], '%Y-%m-%d %H:%M:%S')
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400973 is_dead = utcnow - last_seen > timeout
974 if options.dead_only:
975 if not is_dead:
976 continue
977 elif not options.keep_dead and is_dead:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400978 continue
979
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400980 # If the user requested to filter on dimensions, ensure the bot has all the
981 # dimensions requested.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400982 dimensions = machine['dimensions']
983 for key, value in options.dimensions:
984 if key not in dimensions:
985 break
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400986 # A bot can have multiple value for a key, for example,
987 # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
988 # be accepted.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400989 if isinstance(dimensions[key], list):
990 if value not in dimensions[key]:
991 break
992 else:
993 if value != dimensions[key]:
994 break
995 else:
Marc-Antoine Ruele4bebbc2014-06-04 09:36:14 -0400996 print machine['id']
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400997 if not options.bare:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400998 print ' %s' % dimensions
999 return 0
1000
1001
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001002@subcommand.usage('(hash|isolated) [-- extra_args]')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001003def CMDrun(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001004 """Triggers a task and wait for the results.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001005
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001006 Basically, does everything to run a command remotely.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001007 """
1008 add_trigger_options(parser)
1009 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001010 add_sharding_options(parser)
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001011 args, isolated_cmd_args = extract_isolated_command_extra_args(args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001012 options, args = parser.parse_args(args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001013 process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001014
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001015 try:
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001016 result, task_name = trigger(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001017 swarming=options.swarming,
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05001018 isolate_server=options.isolate_server or options.indir,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001019 namespace=options.namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001020 file_hash_or_isolated=args[0],
1021 task_name=options.task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001022 extra_args=isolated_cmd_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001023 shards=options.shards,
1024 dimensions=options.dimensions,
1025 env=dict(options.env),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -04001026 deadline=options.deadline,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001027 verbose=options.verbose,
1028 profile=options.profile,
1029 priority=options.priority)
1030 except Failure as e:
1031 tools.report_error(
1032 'Failed to trigger %s(%s): %s' %
1033 (options.task_name, args[0], e.args[0]))
1034 return 1
1035 if result:
1036 tools.report_error('Failed to trigger the task.')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001037 return result
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001038 if task_name != options.task_name:
1039 print('Triggered task: %s' % task_name)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001040 try:
1041 return collect(
1042 options.swarming,
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001043 task_name,
Vadim Shtayurab450c602014-05-12 19:23:25 -07001044 options.shards,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001045 options.timeout,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -07001046 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001047 options.print_status_updates,
1048 options.task_output_dir)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001049 except Failure as e:
1050 tools.report_error(e)
1051 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001052
1053
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001054@subcommand.usage("(hash|isolated) [-- extra_args]")
maruel@chromium.org0437a732013-08-27 16:05:52 +00001055def CMDtrigger(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001056 """Triggers a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001057
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001058 Accepts either the hash (sha1) of a .isolated file already uploaded or the
1059 path to an .isolated file to archive, packages it if needed and sends a
1060 Swarming manifest file to the Swarming server.
1061
1062 If an .isolated file is specified instead of an hash, it is first archived.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001063
1064 Passes all extra arguments provided after '--' as additional command line
1065 arguments for an isolated command specified in *.isolate file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001066 """
1067 add_trigger_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001068 add_sharding_options(parser)
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001069 args, isolated_cmd_args = extract_isolated_command_extra_args(args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001070 options, args = parser.parse_args(args)
1071 process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001072
1073 try:
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001074 result, task_name = trigger(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001075 swarming=options.swarming,
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05001076 isolate_server=options.isolate_server or options.indir,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001077 namespace=options.namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001078 file_hash_or_isolated=args[0],
1079 task_name=options.task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001080 extra_args=isolated_cmd_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001081 shards=options.shards,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001082 dimensions=options.dimensions,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -05001083 env=dict(options.env),
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -04001084 deadline=options.deadline,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001085 verbose=options.verbose,
1086 profile=options.profile,
1087 priority=options.priority)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001088 if task_name != options.task_name and not result:
1089 print('Triggered task: %s' % task_name)
1090 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +00001091 except Failure as e:
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001092 tools.report_error(e)
1093 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001094
1095
1096class OptionParserSwarming(tools.OptionParserWithLogging):
1097 def __init__(self, **kwargs):
1098 tools.OptionParserWithLogging.__init__(
1099 self, prog='swarming.py', **kwargs)
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001100 self.server_group = tools.optparse.OptionGroup(self, 'Server')
1101 self.server_group.add_option(
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001102 '-S', '--swarming',
Kevin Graney5346c162014-01-24 12:20:01 -05001103 metavar='URL', default=os.environ.get('SWARMING_SERVER', ''),
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001104 help='Swarming server to use')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001105 self.add_option_group(self.server_group)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001106 auth.add_auth_options(self)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001107
1108 def parse_args(self, *args, **kwargs):
1109 options, args = tools.OptionParserWithLogging.parse_args(
1110 self, *args, **kwargs)
1111 options.swarming = options.swarming.rstrip('/')
1112 if not options.swarming:
1113 self.error('--swarming is required.')
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001114 auth.process_auth_options(self, options)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001115 return options, args
1116
1117
1118def main(args):
1119 dispatcher = subcommand.CommandDispatcher(__name__)
1120 try:
1121 return dispatcher.execute(OptionParserSwarming(version=__version__), args)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001122 except Exception as e:
1123 tools.report_error(e)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001124 return 1
1125
1126
1127if __name__ == '__main__':
1128 fix_encoding.fix_encoding()
1129 tools.disable_buffering()
1130 colorama.init()
1131 sys.exit(main(sys.argv[1:]))