blob: 034363f540fcd66c6dae1afba3008720f83145ce [file] [log] [blame]
maruel@chromium.org0437a732013-08-27 16:05:52 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00005
6"""Client tool to trigger tasks or retrieve results from a Swarming server."""
7
Vadim Shtayurab450c602014-05-12 19:23:25 -07008__version__ = '0.4.8'
maruel@chromium.org0437a732013-08-27 16:05:52 +00009
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040010import datetime
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -050011import getpass
maruel@chromium.org0437a732013-08-27 16:05:52 +000012import hashlib
13import json
14import logging
15import os
Vadim Shtayurae3fbd102014-04-29 17:05:21 -070016import re
maruel@chromium.org0437a732013-08-27 16:05:52 +000017import shutil
maruel@chromium.org0437a732013-08-27 16:05:52 +000018import subprocess
19import sys
Vadim Shtayurab19319e2014-04-27 08:50:06 -070020import threading
maruel@chromium.org0437a732013-08-27 16:05:52 +000021import time
22import urllib
maruel@chromium.org0437a732013-08-27 16:05:52 +000023
24from third_party import colorama
25from third_party.depot_tools import fix_encoding
26from third_party.depot_tools import subcommand
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000027
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -050028from utils import file_path
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040029from third_party.chromium import natsort
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000030from utils import net
maruel@chromium.org0437a732013-08-27 16:05:52 +000031from utils import threading_utils
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000032from utils import tools
33from utils import zip_package
maruel@chromium.org0437a732013-08-27 16:05:52 +000034
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080035import auth
maruel@chromium.org7b844a62013-09-17 13:04:59 +000036import isolateserver
maruel@chromium.org0437a732013-08-27 16:05:52 +000037import run_isolated
38
39
40ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
41TOOLS_PATH = os.path.join(ROOT_DIR, 'tools')
42
43
maruel@chromium.org0437a732013-08-27 16:05:52 +000044# The default time to wait for a shard to finish running.
csharp@chromium.org24758492013-08-28 19:10:54 +000045DEFAULT_SHARD_WAIT_TIME = 80 * 60.
maruel@chromium.org0437a732013-08-27 16:05:52 +000046
Vadim Shtayura86a2cef2014-04-18 11:13:39 -070047# How often to print status updates to stdout in 'collect'.
48STATUS_UPDATE_INTERVAL = 15 * 60.
49
maruel@chromium.org0437a732013-08-27 16:05:52 +000050
51NO_OUTPUT_FOUND = (
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -050052 'No output produced by the task, it may have failed to run.\n'
maruel@chromium.org0437a732013-08-27 16:05:52 +000053 '\n')
54
55
maruel@chromium.org0437a732013-08-27 16:05:52 +000056class Failure(Exception):
57 """Generic failure."""
58 pass
59
60
61class Manifest(object):
Vadim Shtayurab450c602014-05-12 19:23:25 -070062 """Represents a Swarming task manifest."""
maruel@chromium.org0437a732013-08-27 16:05:52 +000063
maruel@chromium.org0437a732013-08-27 16:05:52 +000064 def __init__(
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070065 self, isolate_server, namespace, isolated_hash, task_name, extra_args,
Vadim Shtayurab450c602014-05-12 19:23:25 -070066 env, dimensions, working_dir, deadline, verbose, profile,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070067 priority):
maruel@chromium.org0437a732013-08-27 16:05:52 +000068 """Populates a manifest object.
69 Args:
Marc-Antoine Ruela7049872013-11-05 19:28:35 -050070 isolate_server - isolate server url.
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050071 namespace - isolate server namespace to use.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070072 isolated_hash - the manifest's sha-1 that the slave is going to fetch.
73 task_name - the name to give the task request.
74 extra_args - additional arguments to pass to isolated command.
Marc-Antoine Ruel05dab5e2013-11-06 15:06:47 -050075 env - environment variables to set.
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -050076 dimensions - dimensions to filter the task on.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070077 working_dir - relative working directory to start the script.
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -040078 deadline - maximum pending time before this task expires.
maruel@chromium.org0437a732013-08-27 16:05:52 +000079 verbose - if True, have the slave print more details.
80 profile - if True, have the slave print more timing data.
maruel@chromium.org7b844a62013-09-17 13:04:59 +000081 priority - int between 0 and 1000, lower the higher priority.
maruel@chromium.org0437a732013-08-27 16:05:52 +000082 """
Marc-Antoine Ruela7049872013-11-05 19:28:35 -050083 self.isolate_server = isolate_server
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -050084 self.namespace = namespace
maruel@chromium.org814d23f2013-10-01 19:08:00 +000085 self.isolated_hash = isolated_hash
Vadim Shtayurab450c602014-05-12 19:23:25 -070086 self.task_name = task_name
Vadim Shtayuraae8085b2014-05-02 17:13:10 -070087 self.extra_args = tuple(extra_args or [])
Vadim Shtayurab450c602014-05-12 19:23:25 -070088 self.env = env.copy()
89 self.dimensions = dimensions.copy()
90 self.working_dir = working_dir
91 self.deadline = deadline
maruel@chromium.org0437a732013-08-27 16:05:52 +000092 self.verbose = bool(verbose)
93 self.profile = bool(profile)
94 self.priority = priority
maruel@chromium.org0437a732013-08-27 16:05:52 +000095 self._tasks = []
Vadim Shtayurab450c602014-05-12 19:23:25 -070096 self._files = []
maruel@chromium.org0437a732013-08-27 16:05:52 +000097
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -040098 def add_task(self, task_name, actions, time_out=2*60*60):
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -050099 """Appends a new task as a TestObject to the swarming manifest file.
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500100
101 Tasks cannot be added once the manifest was uploaded.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500102
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -0400103 By default, command will be killed after 2 hours of execution.
104
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500105 See TestObject in services/swarming/src/common/test_request_message.py for
106 the valid format.
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500107 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000108 self._tasks.append(
109 {
110 'action': actions,
111 'decorate_output': self.verbose,
112 'test_name': task_name,
Marc-Antoine Ruelaf78a902014-03-20 10:42:49 -0400113 'hard_time_out': time_out,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000114 })
115
Vadim Shtayurab450c602014-05-12 19:23:25 -0700116 def add_bundled_file(self, file_name, file_url):
117 """Appends a file to the manifest.
118
119 File will be downloaded and extracted by the swarm bot before launching the
120 task.
121 """
122 self._files.append([file_url, file_name])
123
maruel@chromium.org0437a732013-08-27 16:05:52 +0000124 def to_json(self):
125 """Exports the current configuration into a swarm-readable manifest file.
126
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500127 The actual serialization format is defined as a TestCase object as described
128 in services/swarming/src/common/test_request_message.py
maruel@chromium.org0437a732013-08-27 16:05:52 +0000129 """
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500130 request = {
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500131 'cleanup': 'root',
maruel@chromium.org0437a732013-08-27 16:05:52 +0000132 'configurations': [
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500133 # Is a TestConfiguration.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000134 {
Marc-Antoine Ruel5d799192013-11-06 15:20:39 -0500135 'config_name': 'isolated',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700136 'deadline_to_run': self.deadline,
137 'dimensions': self.dimensions,
138 'min_instances': 1,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500139 'priority': self.priority,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000140 },
141 ],
Vadim Shtayurab450c602014-05-12 19:23:25 -0700142 'data': self._files,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500143 'encoding': 'UTF-8',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700144 'env_vars': self.env,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000145 'restart_on_failure': True,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700146 'test_case_name': self.task_name,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500147 'tests': self._tasks,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700148 'working_dir': self.working_dir,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000149 }
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500150 return json.dumps(request, sort_keys=True, separators=(',',':'))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000151
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500152
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700153class TaskOutputCollector(object):
154 """Fetches task output from isolate server to local disk.
155
156 This object is shared among multiple threads running 'retrieve_results'
157 function, in particular they call 'process_shard_result' method in parallel.
158 """
159
160 def __init__(self, task_output_dir, task_name, shard_count):
161 """Initializes TaskOutputCollector, ensures |task_output_dir| exists.
162
163 Args:
164 task_output_dir: local directory to put fetched files to.
165 task_name: name of the swarming task results belong to.
166 shard_count: expected number of task shards.
167 """
168 self.task_output_dir = task_output_dir
169 self.task_name = task_name
170 self.shard_count = shard_count
171
172 self._lock = threading.Lock()
173 self._per_shard_results = {}
174 self._storage = None
175
176 if not os.path.isdir(self.task_output_dir):
177 os.makedirs(self.task_output_dir)
178
Vadim Shtayurab450c602014-05-12 19:23:25 -0700179 def process_shard_result(self, shard_index, result):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700180 """Stores results of a single task shard, fetches output files if necessary.
181
182 Called concurrently from multiple threads.
183 """
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700184 # Sanity check index is in expected range.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700185 assert isinstance(shard_index, int)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700186 if shard_index < 0 or shard_index >= self.shard_count:
187 logging.warning(
188 'Shard index %d is outside of expected range: [0; %d]',
189 shard_index, self.shard_count - 1)
190 return
191
192 # Store result dict of that shard, ignore results we've already seen.
193 with self._lock:
194 if shard_index in self._per_shard_results:
195 logging.warning('Ignoring duplicate shard index %d', shard_index)
196 return
197 self._per_shard_results[shard_index] = result
198
199 # Fetch output files if necessary.
200 isolated_files_location = extract_output_files_location(result['output'])
201 if isolated_files_location:
202 isolate_server, namespace, isolated_hash = isolated_files_location
203 storage = self._get_storage(isolate_server, namespace)
204 if storage:
205 # Output files are supposed to be small and they are not reused across
206 # tasks. So use MemoryCache for them instead of on-disk cache. Make
207 # files writable, so that calling script can delete them.
208 isolateserver.fetch_isolated(
209 isolated_hash,
210 storage,
211 isolateserver.MemoryCache(file_mode_mask=0700),
212 os.path.join(self.task_output_dir, str(shard_index)),
213 False)
214
215 def finalize(self):
216 """Writes summary.json, shutdowns underlying Storage."""
217 with self._lock:
218 # Write an array of shard results with None for missing shards.
219 summary = {
220 'task_name': self.task_name,
221 'shards': [
222 self._per_shard_results.get(i) for i in xrange(self.shard_count)
223 ],
224 }
225 tools.write_json(
226 os.path.join(self.task_output_dir, 'summary.json'),
227 summary,
228 False)
229 if self._storage:
230 self._storage.close()
231 self._storage = None
232
233 def _get_storage(self, isolate_server, namespace):
234 """Returns isolateserver.Storage to use to fetch files."""
235 with self._lock:
236 if not self._storage:
237 self._storage = isolateserver.get_storage(isolate_server, namespace)
238 else:
239 # Shards must all use exact same isolate server and namespace.
240 if self._storage.location != isolate_server:
241 logging.error(
242 'Task shards are using multiple isolate servers: %s and %s',
243 self._storage.location, isolate_server)
244 return None
245 if self._storage.namespace != namespace:
246 logging.error(
247 'Task shards are using multiple namespaces: %s and %s',
248 self._storage.namespace, namespace)
249 return None
250 return self._storage
251
252
maruel@chromium.org0437a732013-08-27 16:05:52 +0000253def now():
254 """Exists so it can be mocked easily."""
255 return time.time()
256
257
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500258def get_task_keys(swarm_base_url, task_name):
259 """Returns the Swarming task key for each shards of task_name."""
260 key_data = urllib.urlencode([('name', task_name)])
maruel@chromium.org0437a732013-08-27 16:05:52 +0000261 url = '%s/get_matching_test_cases?%s' % (swarm_base_url, key_data)
262
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000263 for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
264 result = net.url_read(url, retry_404=True)
265 if result is None:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000266 raise Failure(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500267 'Error: Unable to find any task with the name, %s, on swarming server'
268 % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000269
maruel@chromium.org0437a732013-08-27 16:05:52 +0000270 # TODO(maruel): Compare exact string.
271 if 'No matching' in result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500272 logging.warning('Unable to find any task with the name, %s, on swarming '
273 'server' % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000274 continue
275 return json.loads(result)
276
277 raise Failure(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500278 'Error: Unable to find any task with the name, %s, on swarming server'
279 % task_name)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000280
281
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700282def extract_output_files_location(task_log):
283 """Task log -> location of task output files to fetch.
284
285 TODO(vadimsh,maruel): Use side-channel to get this information.
286 See 'run_tha_test' in run_isolated.py for where the data is generated.
287
288 Returns:
289 Tuple (isolate server URL, namespace, isolated hash) on success.
290 None if information is missing or can not be parsed.
291 """
292 match = re.search(
293 r'\[run_isolated_out_hack\](.*)\[/run_isolated_out_hack\]',
294 task_log,
295 re.DOTALL)
296 if not match:
297 return None
298
299 def to_ascii(val):
300 if not isinstance(val, basestring):
301 raise ValueError()
302 return val.encode('ascii')
303
304 try:
305 data = json.loads(match.group(1))
306 if not isinstance(data, dict):
307 raise ValueError()
308 isolated_hash = to_ascii(data['hash'])
309 namespace = to_ascii(data['namespace'])
310 isolate_server = to_ascii(data['storage'])
311 if not file_path.is_url(isolate_server):
312 raise ValueError()
313 return (isolate_server, namespace, isolated_hash)
314 except (KeyError, ValueError):
315 logging.warning(
316 'Unexpected value of run_isolated_out_hack: %s', match.group(1))
317 return None
318
319
320def retrieve_results(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700321 base_url, shard_index, task_key, timeout, should_stop, output_collector):
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700322 """Retrieves results for a single task_key.
323
Vadim Shtayurab450c602014-05-12 19:23:25 -0700324 Returns:
325 <result dict> on success.
326 None on failure.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700327 """
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000328 assert isinstance(timeout, float), timeout
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500329 params = [('r', task_key)]
maruel@chromium.org0437a732013-08-27 16:05:52 +0000330 result_url = '%s/get_result?%s' % (base_url, urllib.urlencode(params))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700331 started = now()
332 deadline = started + timeout if timeout else None
333 attempt = 0
334
335 while not should_stop.is_set():
336 attempt += 1
337
338 # Waiting for too long -> give up.
339 current_time = now()
340 if deadline and current_time >= deadline:
341 logging.error('retrieve_results(%s) timed out on attempt %d',
342 base_url, attempt)
343 return None
344
345 # Do not spin too fast. Spin faster at the beginning though.
346 # Start with 1 sec delay and for each 30 sec of waiting add another second
347 # of delay, until hitting 15 sec ceiling.
348 if attempt > 1:
349 max_delay = min(15, 1 + (current_time - started) / 30.0)
350 delay = min(max_delay, deadline - current_time) if deadline else max_delay
351 if delay > 0:
352 logging.debug('Waiting %.1f sec before retrying', delay)
353 should_stop.wait(delay)
354 if should_stop.is_set():
355 return None
356
357 # Disable internal retries in net.url_read, since we are doing retries
358 # ourselves. Do not use retry_404 so should_stop is polled more often.
vadimsh@chromium.org043b76d2013-09-12 16:15:13 +0000359 response = net.url_read(result_url, retry_404=False, retry_50x=False)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700360
361 # Request failed. Try again.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000362 if response is None:
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700363 continue
364
365 # Got some response, ensure it is JSON dict, retry if not.
366 try:
367 result = json.loads(response) or {}
368 if not isinstance(result, dict):
369 raise ValueError()
370 except (ValueError, TypeError):
371 logging.warning(
372 'Received corrupted or invalid data for task_key %s, retrying: %r',
373 task_key, response)
374 continue
375
376 # Swarming server uses non-empty 'output' value as a flag that task has
377 # finished. How to wait for tasks that produce no output is a mystery.
378 if result.get('output'):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700379 # Record the result, try to fetch attached output files (if any).
380 if output_collector:
381 # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700382 output_collector.process_shard_result(shard_index, result)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700383 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +0000384
385
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700386def yield_results(
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700387 swarm_base_url, task_keys, timeout, max_threads,
388 print_status_updates, output_collector):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500389 """Yields swarming task results from the swarming server as (index, result).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000390
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700391 Duplicate shards are ignored. Shards are yielded in order of completion.
392 Timed out shards are NOT yielded at all. Caller can compare number of yielded
393 shards with len(task_keys) to verify all shards completed.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000394
395 max_threads is optional and is used to limit the number of parallel fetches
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500396 done. Since in general the number of task_keys is in the range <=10, it's not
maruel@chromium.org0437a732013-08-27 16:05:52 +0000397 worth normally to limit the number threads. Mostly used for testing purposes.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500398
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700399 output_collector is an optional instance of TaskOutputCollector that will be
400 used to fetch files produced by a task from isolate server to the local disk.
401
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500402 Yields:
403 (index, result). In particular, 'result' is defined as the
404 GetRunnerResults() function in services/swarming/server/test_runner.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000405 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000406 number_threads = (
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500407 min(max_threads, len(task_keys)) if max_threads else len(task_keys))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700408 should_stop = threading.Event()
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700409 results_channel = threading_utils.TaskChannel()
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700410
maruel@chromium.org0437a732013-08-27 16:05:52 +0000411 with threading_utils.ThreadPool(number_threads, number_threads, 0) as pool:
412 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700413 # Adds a task to the thread pool to call 'retrieve_results' and return
414 # the results together with shard_index that produced them (as a tuple).
415 def enqueue_retrieve_results(shard_index, task_key):
416 task_fn = lambda *args: (shard_index, retrieve_results(*args))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000417 pool.add_task(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700418 0, results_channel.wrap_task(task_fn),
419 swarm_base_url, shard_index, task_key, timeout,
420 should_stop, output_collector)
421
422 # Enqueue 'retrieve_results' calls for each shard key to run in parallel.
423 for shard_index, task_key in enumerate(task_keys):
424 enqueue_retrieve_results(shard_index, task_key)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700425
426 # Wait for all of them to finish.
427 shards_remaining = range(len(task_keys))
428 active_task_count = len(task_keys)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700429 while active_task_count:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700430 shard_index, result = None, None
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700431 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700432 shard_index, result = results_channel.pull(
433 timeout=STATUS_UPDATE_INTERVAL)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700434 except threading_utils.TaskChannel.Timeout:
435 if print_status_updates:
436 print(
437 'Waiting for results from the following shards: %s' %
438 ', '.join(map(str, shards_remaining)))
439 sys.stdout.flush()
440 continue
441 except Exception:
442 logging.exception('Unexpected exception in retrieve_results')
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700443
444 # A call to 'retrieve_results' finished (successfully or not).
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700445 active_task_count -= 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000446 if not result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500447 logging.error('Failed to retrieve the results for a swarming key')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000448 continue
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700449
Vadim Shtayurab450c602014-05-12 19:23:25 -0700450 # Yield back results to the caller.
451 assert shard_index in shards_remaining
452 shards_remaining.remove(shard_index)
453 yield shard_index, result
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700454
maruel@chromium.org0437a732013-08-27 16:05:52 +0000455 finally:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700456 # Done or aborted with Ctrl+C, kill the remaining threads.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000457 should_stop.set()
458
459
Vadim Shtayurab450c602014-05-12 19:23:25 -0700460def setup_run_isolated(manifest, bundle):
461 """Sets up the manifest to run an isolated task via run_isolated.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000462
Vadim Shtayurab450c602014-05-12 19:23:25 -0700463 Modifies |bundle| (by adding files) and |manifest| (by adding commands) in
464 place.
465
466 Args:
467 manifest: Manifest with swarm task definition.
468 bundle: ZipPackage with files that would be transfered to swarm bot.
469 If None, only |manifest| is modified (useful in tests).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000470 """
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000471 # Add uncompressed zip here. It'll be compressed as part of the package sent
472 # to Swarming server.
473 run_test_name = 'run_isolated.zip'
Vadim Shtayurab450c602014-05-12 19:23:25 -0700474 if bundle and run_test_name not in bundle.files:
475 bundle.add_buffer(
476 run_test_name,
477 run_isolated.get_as_zip_package().zip_into_buffer(compress=False))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000478
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000479 cleanup_script_name = 'swarm_cleanup.py'
Vadim Shtayurab450c602014-05-12 19:23:25 -0700480 if bundle and cleanup_script_name not in bundle.files:
481 bundle.add_file(
482 os.path.join(TOOLS_PATH, cleanup_script_name), cleanup_script_name)
vadimsh@chromium.org6b706212013-08-28 15:03:46 +0000483
maruel@chromium.org0437a732013-08-27 16:05:52 +0000484 run_cmd = [
485 'python', run_test_name,
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000486 '--hash', manifest.isolated_hash,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500487 '--namespace', manifest.namespace,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000488 ]
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500489 if file_path.is_url(manifest.isolate_server):
490 run_cmd.extend(('--isolate-server', manifest.isolate_server))
491 else:
492 run_cmd.extend(('--indir', manifest.isolate_server))
493
maruel@chromium.org0437a732013-08-27 16:05:52 +0000494 if manifest.verbose or manifest.profile:
495 # Have it print the profiling section.
496 run_cmd.append('--verbose')
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700497
498 # Pass all extra args for run_isolated.py, it will pass them to the command.
499 if manifest.extra_args:
500 run_cmd.append('--')
501 run_cmd.extend(manifest.extra_args)
502
maruel@chromium.org0437a732013-08-27 16:05:52 +0000503 manifest.add_task('Run Test', run_cmd)
504
505 # Clean up
506 manifest.add_task('Clean Up', ['python', cleanup_script_name])
507
508
Vadim Shtayurab450c602014-05-12 19:23:25 -0700509def setup_googletest(env, shards, index):
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500510 """Sets googletest specific environment variables."""
511 if shards > 1:
512 env = env.copy()
Vadim Shtayurab450c602014-05-12 19:23:25 -0700513 env['GTEST_SHARD_INDEX'] = str(index)
514 env['GTEST_TOTAL_SHARDS'] = str(shards)
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500515 return env
516
517
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500518def archive(isolate_server, namespace, isolated, algo, verbose):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000519 """Archives a .isolated and all the dependencies on the CAC."""
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500520 logging.info('archive(%s, %s, %s)', isolate_server, namespace, isolated)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000521 tempdir = None
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500522 if file_path.is_url(isolate_server):
523 command = 'archive'
524 flag = '--isolate-server'
525 else:
526 command = 'hashtable'
527 flag = '--outdir'
528
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500529 print('Archiving: %s' % isolated)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000530 try:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000531 cmd = [
532 sys.executable,
533 os.path.join(ROOT_DIR, 'isolate.py'),
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -0500534 command,
535 flag, isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500536 '--namespace', namespace,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000537 '--isolated', isolated,
538 ]
maruel@chromium.orge9403ab2013-09-20 18:03:49 +0000539 cmd.extend(['--verbose'] * verbose)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000540 logging.info(' '.join(cmd))
541 if subprocess.call(cmd, verbose):
542 return
maruel@chromium.org7b844a62013-09-17 13:04:59 +0000543 return isolateserver.hash_file(isolated, algo)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000544 finally:
545 if tempdir:
546 shutil.rmtree(tempdir)
547
548
Vadim Shtayurab450c602014-05-12 19:23:25 -0700549def get_shard_task_name(task_name, shards, index):
550 """Returns a task name to use for a single shard of a task."""
551 if shards == 1:
552 return task_name
553 return '%s:%s:%s' % (task_name, shards, index)
554
555
556def upload_zip_bundle(isolate_server, bundle):
557 """Uploads a zip package to isolate storage and returns raw fetch URL.
558
559 Args:
560 isolate_server: URL of an isolate server.
561 bundle: instance of ZipPackage to upload.
562
563 Returns:
564 URL to get the file from on success.
565 None on failure.
566 """
567 # Swarming bot would need to be able to grab the file from the storage
568 # using raw HTTP GET. Use 'default' namespace so that the raw data returned
569 # to a bot is not zipped, since swarm_bot doesn't understand compressed
570 # data yet. This namespace have nothing to do with |namespace| passed to
571 # run_isolated.py that is used to store files for isolated task.
572 logging.info('Zipping up and uploading files...')
573 try:
574 start_time = now()
575 isolate_item = isolateserver.BufferItem(
576 bundle.zip_into_buffer(), high_priority=True)
577 with isolateserver.get_storage(isolate_server, 'default') as storage:
578 uploaded = storage.upload_items([isolate_item])
579 bundle_url = storage.get_fetch_url(isolate_item)
580 elapsed = now() - start_time
581 except (IOError, OSError) as exc:
582 tools.report_error('Failed to upload the zip file: %s' % exc)
583 return None
584 if isolate_item in uploaded:
585 logging.info('Upload complete, time elapsed: %f', elapsed)
586 else:
587 logging.info('Zip file already on server, time elapsed: %f', elapsed)
588 return bundle_url
589
590
591def trigger_by_manifest(swarming, manifest):
592 """Given a task manifest, triggers it for execution on swarming.
593
594 Args:
595 swarming: URL of a swarming service.
596 manifest: instance of Manifest.
597
598 Returns:
599 True on success, False on failure.
600 """
601 logging.info('Triggering: %s', manifest.task_name)
602 manifest_text = manifest.to_json()
603 result = net.url_read(swarming + '/test', data={'request': manifest_text})
604 if not result:
605 tools.report_error('Failed to trigger task %s' % manifest.task_name)
606 return False
607 try:
608 json.loads(result)
609 except (ValueError, TypeError) as e:
610 msg = '\n'.join((
611 'Failed to trigger task %s' % manifest.task_name,
612 'Manifest: %s' % manifest_text,
613 'Bad response: %s' % result,
614 str(e)))
615 tools.report_error(msg)
616 return False
617 return True
618
619
620def abort_by_manifest(_swarming, _manifest):
621 """Given a task manifest that was triggered, aborts its execution."""
622 # TODO(vadimsh): No supported by the server yet.
623
624
625def trigger_task_shards(
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700626 swarming, isolate_server, namespace, isolated_hash, task_name, extra_args,
627 shards, dimensions, env, working_dir, deadline, verbose, profile, priority):
Vadim Shtayurab450c602014-05-12 19:23:25 -0700628 """Triggers multiple subtasks of a sharded task."""
629 # Collects all files that are necessary to bootstrap a task execution
630 # on the bot. Usually it includes self contained run_isolated.zip and
631 # a bunch of small other scripts. All heavy files are pulled
632 # by run_isolated.zip. Updated in 'setup_run_isolated'.
633 bundle = zip_package.ZipPackage(ROOT_DIR)
634
635 # Make a separate Manifest for each shard, put shard index and number of
636 # shards into env and subtask name.
637 manifests = []
638 for index in xrange(shards):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000639 manifest = Manifest(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500640 isolate_server=isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500641 namespace=namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500642 isolated_hash=isolated_hash,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700643 task_name=get_shard_task_name(task_name, shards, index),
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700644 extra_args=extra_args,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500645 dimensions=dimensions,
Vadim Shtayurab450c602014-05-12 19:23:25 -0700646 env=setup_googletest(env, shards, index),
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500647 working_dir=working_dir,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400648 deadline=deadline,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500649 verbose=verbose,
650 profile=profile,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800651 priority=priority)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700652 setup_run_isolated(manifest, bundle)
653 manifests.append(manifest)
654
655 # Upload zip bundle file to get its URL.
656 bundle_url = upload_zip_bundle(isolate_server, bundle)
657 if not bundle_url:
maruel@chromium.org0437a732013-08-27 16:05:52 +0000658 return 1
659
Vadim Shtayurab450c602014-05-12 19:23:25 -0700660 # Attach that file to all manifests.
661 for manifest in manifests:
662 manifest.add_bundled_file('swarm_data.zip', bundle_url)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000663
Vadim Shtayurab450c602014-05-12 19:23:25 -0700664 # Trigger all the subtasks.
665 triggered = []
666 for manifest in manifests:
667 if trigger_by_manifest(swarming, manifest):
668 triggered.append(manifest)
669 else:
670 break
671
672 # Some shards weren't triggered. Abort everything.
673 if len(triggered) != len(manifests):
674 if triggered:
675 print >> sys.stderr, 'Not all shards were triggered'
676 for manifest in triggered:
677 abort_by_manifest(swarming, manifest)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000678 return 1
679
maruel@chromium.org0437a732013-08-27 16:05:52 +0000680 return 0
681
682
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500683def isolated_to_hash(isolate_server, namespace, arg, algo, verbose):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500684 """Archives a .isolated file if needed.
685
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500686 Returns the file hash to trigger and a bool specifying if it was a file (True)
687 or a hash (False).
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500688 """
689 if arg.endswith('.isolated'):
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500690 file_hash = archive(isolate_server, namespace, arg, algo, verbose)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500691 if not file_hash:
692 tools.report_error('Archival failure %s' % arg)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500693 return None, True
694 return file_hash, True
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500695 elif isolateserver.is_valid_hash(arg, algo):
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500696 return arg, False
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500697 else:
698 tools.report_error('Invalid hash %s' % arg)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500699 return None, False
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500700
701
maruel@chromium.org0437a732013-08-27 16:05:52 +0000702def trigger(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -0500703 swarming,
704 isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500705 namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500706 file_hash_or_isolated,
707 task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700708 extra_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500709 shards,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500710 dimensions,
711 env,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500712 working_dir,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400713 deadline,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000714 verbose,
715 profile,
716 priority):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500717 """Sends off the hash swarming task requests."""
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500718 file_hash, is_file = isolated_to_hash(
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500719 isolate_server, namespace, file_hash_or_isolated, hashlib.sha1, verbose)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500720 if not file_hash:
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500721 return 1, ''
722 if not task_name:
723 # If a file name was passed, use its base name of the isolated hash.
724 # Otherwise, use user name as an approximation of a task name.
725 if is_file:
726 key = os.path.splitext(os.path.basename(file_hash_or_isolated))[0]
727 else:
728 key = getpass.getuser()
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700729 task_name = '%s/%s/%s/%d' % (
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500730 key,
731 '_'.join('%s=%s' % (k, v) for k, v in sorted(dimensions.iteritems())),
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700732 file_hash,
733 now() * 1000)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500734
Vadim Shtayurab450c602014-05-12 19:23:25 -0700735 result = trigger_task_shards(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500736 swarming=swarming,
737 isolate_server=isolate_server,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500738 namespace=namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500739 isolated_hash=file_hash,
740 task_name=task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700741 extra_args=extra_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500742 shards=shards,
743 dimensions=dimensions,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400744 deadline=deadline,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500745 env=env,
746 working_dir=working_dir,
747 verbose=verbose,
748 profile=profile,
Vadim Shtayurabcff74f2014-02-27 16:19:34 -0800749 priority=priority)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500750 return result, task_name
maruel@chromium.org0437a732013-08-27 16:05:52 +0000751
752
Vadim Shtayurab450c602014-05-12 19:23:25 -0700753def decorate_shard_output(shard_index, result, shard_exit_code):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000754 """Returns wrapped output for swarming task shard."""
755 tag = 'index %s (machine tag: %s, id: %s)' % (
Vadim Shtayurab450c602014-05-12 19:23:25 -0700756 shard_index,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000757 result['machine_id'],
758 result.get('machine_tag', 'unknown'))
759 return (
760 '\n'
761 '================================================================\n'
762 'Begin output from shard %s\n'
763 '================================================================\n'
764 '\n'
765 '%s'
766 '================================================================\n'
767 'End output from shard %s. Return %d\n'
768 '================================================================\n'
769 ) % (tag, result['output'] or NO_OUTPUT_FOUND, tag, shard_exit_code)
770
771
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700772def collect(
Vadim Shtayurab450c602014-05-12 19:23:25 -0700773 url, task_name, shards, timeout, decorate,
774 print_status_updates, task_output_dir):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500775 """Retrieves results of a Swarming task."""
Vadim Shtayurab450c602014-05-12 19:23:25 -0700776 # Grab task keys for each shard. Order is important, used to figure out
777 # shard index based on the key.
778 # TODO(vadimsh): Simplify this once server support is added.
779 task_keys = []
780 for index in xrange(shards):
781 shard_task_name = get_shard_task_name(task_name, shards, index)
782 logging.info('Collecting %s', shard_task_name)
783 shard_task_keys = get_task_keys(url, shard_task_name)
784 if not shard_task_keys:
785 raise Failure('No task keys to get results with: %s' % shard_task_name)
786 if len(shard_task_keys) != 1:
787 raise Failure('Expecting only one shard for a task: %s' % shard_task_name)
788 task_keys.append(shard_task_keys[0])
maruel@chromium.org0437a732013-08-27 16:05:52 +0000789
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700790 # Collect output files only if explicitly asked with --task-output-dir option.
791 if task_output_dir:
792 output_collector = TaskOutputCollector(
793 task_output_dir, task_name, len(task_keys))
794 else:
795 output_collector = None
796
maruel@chromium.org9c1c7b52013-08-28 19:04:36 +0000797 exit_code = None
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700798 seen_shards = set()
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700799
800 try:
801 for index, output in yield_results(
802 url, task_keys, timeout, None, print_status_updates, output_collector):
803 seen_shards.add(index)
804 shard_exit_codes = (output['exit_codes'] or '1').split(',')
805 shard_exit_code = max(int(i) for i in shard_exit_codes)
806 if decorate:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700807 print decorate_shard_output(index, output, shard_exit_code)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700808 else:
809 print(
810 '%s/%s: %s' % (
811 output['machine_id'],
812 output['machine_tag'],
813 output['exit_codes']))
814 print(''.join(' %s\n' % l for l in output['output'].splitlines()))
815 exit_code = exit_code or shard_exit_code
816 finally:
817 if output_collector:
818 output_collector.finalize()
819
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700820 if len(seen_shards) != len(task_keys):
821 missing_shards = [x for x in range(len(task_keys)) if x not in seen_shards]
822 print >> sys.stderr, ('Results from some shards are missing: %s' %
823 ', '.join(map(str, missing_shards)))
824 exit_code = exit_code or 1
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700825
maruel@chromium.org9c1c7b52013-08-28 19:04:36 +0000826 return exit_code if exit_code is not None else 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000827
828
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400829def add_filter_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500830 parser.filter_group = tools.optparse.OptionGroup(parser, 'Filtering slaves')
831 parser.filter_group.add_option(
Marc-Antoine Ruelb39e8cf2014-01-20 10:39:31 -0500832 '-d', '--dimension', default=[], action='append', nargs=2,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500833 dest='dimensions', metavar='FOO bar',
834 help='dimension to filter on')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500835 parser.add_option_group(parser.filter_group)
836
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400837
Marc-Antoine Ruel025e7822014-05-01 11:50:24 -0400838def process_filter_options(parser, options):
839 options.dimensions = dict(options.dimensions)
840 if not options.dimensions:
841 parser.error('Please at least specify one --dimension')
842
843
Vadim Shtayurab450c602014-05-12 19:23:25 -0700844def add_sharding_options(parser):
845 parser.sharding_group = tools.optparse.OptionGroup(parser, 'Sharding options')
846 parser.sharding_group.add_option(
847 '--shards', type='int', default=1,
848 help='Number of shards to trigger and collect.')
849 parser.add_option_group(parser.sharding_group)
850
851
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400852def add_trigger_options(parser):
853 """Adds all options to trigger a task on Swarming."""
854 isolateserver.add_isolate_server_options(parser, True)
855 add_filter_options(parser)
856
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500857 parser.task_group = tools.optparse.OptionGroup(parser, 'Task properties')
858 parser.task_group.add_option(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500859 '-w', '--working-dir', default='swarm_tests',
860 help='Working directory on the swarming slave side. default: %default.')
861 parser.task_group.add_option(
862 '--working_dir', help=tools.optparse.SUPPRESS_HELP)
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500863 parser.task_group.add_option(
864 '-e', '--env', default=[], action='append', nargs=2, metavar='FOO bar',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700865 help='Environment variables to set')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500866 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500867 '--priority', type='int', default=100,
868 help='The lower value, the more important the task is')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500869 parser.task_group.add_option(
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500870 '-T', '--task-name',
871 help='Display name of the task. It uniquely identifies the task. '
Vadim Shtayurac3d97b02014-04-26 19:16:05 -0700872 'Defaults to <base_name>/<dimensions>/<isolated hash>/<timestamp> '
873 'if an isolated file is provided, if a hash is provided, it '
874 'defaults to <user>/<dimensions>/<isolated hash>/<timestamp>')
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400875 parser.task_group.add_option(
876 '--deadline', type='int', default=6*60*60,
877 help='Seconds to allow the task to be pending for a bot to run before '
878 'this task request expires.')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500879 parser.add_option_group(parser.task_group)
Marc-Antoine Ruelcd629732013-12-20 15:00:42 -0500880 # TODO(maruel): This is currently written in a chromium-specific way.
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500881 parser.group_logging.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000882 '--profile', action='store_true',
883 default=bool(os.environ.get('ISOLATE_DEBUG')),
884 help='Have run_isolated.py print profiling info')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000885
886
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500887def process_trigger_options(parser, options, args):
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -0500888 isolateserver.process_isolate_server_options(parser, options)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500889 if len(args) != 1:
890 parser.error('Must pass one .isolated file or its hash (sha1).')
Marc-Antoine Ruel025e7822014-05-01 11:50:24 -0400891 process_filter_options(parser, options)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000892
893
894def add_collect_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500895 parser.server_group.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000896 '-t', '--timeout',
897 type='float',
898 default=DEFAULT_SHARD_WAIT_TIME,
899 help='Timeout to wait for result, set to 0 for no timeout; default: '
900 '%default s')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500901 parser.group_logging.add_option(
902 '--decorate', action='store_true', help='Decorate output')
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700903 parser.group_logging.add_option(
904 '--print-status-updates', action='store_true',
905 help='Print periodic status updates')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700906 parser.task_output_group = tools.optparse.OptionGroup(parser, 'Task output')
907 parser.task_output_group.add_option(
908 '--task-output-dir',
909 help='Directory to put task results into. When the task finishes, this '
910 'directory contains <task-output-dir>/summary.json file with '
911 'a summary of task results across all shards, and per-shard '
912 'directory with output files produced by a shard: '
913 '<task-output-dir>/<zero-based-shard-index>/')
914 parser.add_option_group(parser.task_output_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000915
916
Vadim Shtayuraae8085b2014-05-02 17:13:10 -0700917def extract_isolated_command_extra_args(args):
918 try:
919 index = args.index('--')
920 except ValueError:
921 return (args, [])
922 return (args[:index], args[index+1:])
923
924
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500925@subcommand.usage('task_name')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000926def CMDcollect(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500927 """Retrieves results of a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000928
929 The result can be in multiple part if the execution was sharded. It can
930 potentially have retries.
931 """
932 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700933 add_sharding_options(parser)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000934 (options, args) = parser.parse_args(args)
935 if not args:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500936 parser.error('Must specify one task name.')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000937 elif len(args) > 1:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500938 parser.error('Must specify only one task name.')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000939
940 try:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700941 return collect(
942 options.swarming,
943 args[0],
Vadim Shtayurab450c602014-05-12 19:23:25 -0700944 options.shards,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700945 options.timeout,
946 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700947 options.print_status_updates,
948 options.task_output_dir)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000949 except Failure as e:
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +0000950 tools.report_error(e)
951 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000952
953
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400954def CMDquery(parser, args):
955 """Returns information about the bots connected to the Swarming server."""
956 add_filter_options(parser)
957 parser.filter_group.add_option(
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400958 '--dead-only', action='store_true',
959 help='Only print dead bots, useful to reap them and reimage broken bots')
960 parser.filter_group.add_option(
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400961 '-k', '--keep-dead', action='store_true',
962 help='Do not filter out dead bots')
963 parser.filter_group.add_option(
964 '-b', '--bare', action='store_true',
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400965 help='Do not print out dimensions')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400966 options, args = parser.parse_args(args)
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400967
968 if options.keep_dead and options.dead_only:
969 parser.error('Use only one of --keep-dead and --dead-only')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400970 service = net.get_http_service(options.swarming)
971 data = service.json_request('GET', '/swarming/api/v1/bots')
972 if data is None:
973 print >> sys.stderr, 'Failed to access %s' % options.swarming
974 return 1
975 timeout = datetime.timedelta(seconds=data['machine_death_timeout'])
976 utcnow = datetime.datetime.utcnow()
977 for machine in natsort.natsorted(data['machines'], key=lambda x: x['tag']):
978 last_seen = datetime.datetime.strptime(
979 machine['last_seen'], '%Y-%m-%d %H:%M:%S')
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400980 is_dead = utcnow - last_seen > timeout
981 if options.dead_only:
982 if not is_dead:
983 continue
984 elif not options.keep_dead and is_dead:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400985 continue
986
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400987 # If the user requested to filter on dimensions, ensure the bot has all the
988 # dimensions requested.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400989 dimensions = machine['dimensions']
990 for key, value in options.dimensions:
991 if key not in dimensions:
992 break
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400993 # A bot can have multiple value for a key, for example,
994 # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
995 # be accepted.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400996 if isinstance(dimensions[key], list):
997 if value not in dimensions[key]:
998 break
999 else:
1000 if value != dimensions[key]:
1001 break
1002 else:
1003 print machine['tag']
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -04001004 if not options.bare:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -04001005 print ' %s' % dimensions
1006 return 0
1007
1008
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001009@subcommand.usage('(hash|isolated) [-- extra_args]')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001010def CMDrun(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001011 """Triggers a task and wait for the results.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001012
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001013 Basically, does everything to run a command remotely.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001014 """
1015 add_trigger_options(parser)
1016 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001017 add_sharding_options(parser)
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001018 args, isolated_cmd_args = extract_isolated_command_extra_args(args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001019 options, args = parser.parse_args(args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001020 process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001021
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001022 try:
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001023 result, task_name = trigger(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001024 swarming=options.swarming,
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05001025 isolate_server=options.isolate_server or options.indir,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001026 namespace=options.namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001027 file_hash_or_isolated=args[0],
1028 task_name=options.task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001029 extra_args=isolated_cmd_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001030 shards=options.shards,
1031 dimensions=options.dimensions,
1032 env=dict(options.env),
1033 working_dir=options.working_dir,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -04001034 deadline=options.deadline,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001035 verbose=options.verbose,
1036 profile=options.profile,
1037 priority=options.priority)
1038 except Failure as e:
1039 tools.report_error(
1040 'Failed to trigger %s(%s): %s' %
1041 (options.task_name, args[0], e.args[0]))
1042 return 1
1043 if result:
1044 tools.report_error('Failed to trigger the task.')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001045 return result
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001046 if task_name != options.task_name:
1047 print('Triggered task: %s' % task_name)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001048 try:
1049 return collect(
1050 options.swarming,
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001051 task_name,
Vadim Shtayurab450c602014-05-12 19:23:25 -07001052 options.shards,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001053 options.timeout,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -07001054 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001055 options.print_status_updates,
1056 options.task_output_dir)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001057 except Failure as e:
1058 tools.report_error(e)
1059 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001060
1061
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001062@subcommand.usage("(hash|isolated) [-- extra_args]")
maruel@chromium.org0437a732013-08-27 16:05:52 +00001063def CMDtrigger(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001064 """Triggers a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001065
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001066 Accepts either the hash (sha1) of a .isolated file already uploaded or the
1067 path to an .isolated file to archive, packages it if needed and sends a
1068 Swarming manifest file to the Swarming server.
1069
1070 If an .isolated file is specified instead of an hash, it is first archived.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001071
1072 Passes all extra arguments provided after '--' as additional command line
1073 arguments for an isolated command specified in *.isolate file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001074 """
1075 add_trigger_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001076 add_sharding_options(parser)
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001077 args, isolated_cmd_args = extract_isolated_command_extra_args(args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001078 options, args = parser.parse_args(args)
1079 process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001080
1081 try:
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001082 result, task_name = trigger(
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001083 swarming=options.swarming,
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -05001084 isolate_server=options.isolate_server or options.indir,
Marc-Antoine Ruel1687b5e2014-02-06 17:47:53 -05001085 namespace=options.namespace,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001086 file_hash_or_isolated=args[0],
1087 task_name=options.task_name,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001088 extra_args=isolated_cmd_args,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001089 shards=options.shards,
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001090 dimensions=options.dimensions,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -05001091 env=dict(options.env),
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001092 working_dir=options.working_dir,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -04001093 deadline=options.deadline,
Marc-Antoine Ruela7049872013-11-05 19:28:35 -05001094 verbose=options.verbose,
1095 profile=options.profile,
1096 priority=options.priority)
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -05001097 if task_name != options.task_name and not result:
1098 print('Triggered task: %s' % task_name)
1099 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +00001100 except Failure as e:
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001101 tools.report_error(e)
1102 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001103
1104
1105class OptionParserSwarming(tools.OptionParserWithLogging):
1106 def __init__(self, **kwargs):
1107 tools.OptionParserWithLogging.__init__(
1108 self, prog='swarming.py', **kwargs)
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001109 self.server_group = tools.optparse.OptionGroup(self, 'Server')
1110 self.server_group.add_option(
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001111 '-S', '--swarming',
Kevin Graney5346c162014-01-24 12:20:01 -05001112 metavar='URL', default=os.environ.get('SWARMING_SERVER', ''),
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001113 help='Swarming server to use')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001114 self.add_option_group(self.server_group)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001115 auth.add_auth_options(self)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001116
1117 def parse_args(self, *args, **kwargs):
1118 options, args = tools.OptionParserWithLogging.parse_args(
1119 self, *args, **kwargs)
1120 options.swarming = options.swarming.rstrip('/')
1121 if not options.swarming:
1122 self.error('--swarming is required.')
Vadim Shtayura5d1efce2014-02-04 10:55:43 -08001123 auth.process_auth_options(self, options)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001124 return options, args
1125
1126
1127def main(args):
1128 dispatcher = subcommand.CommandDispatcher(__name__)
1129 try:
1130 return dispatcher.execute(OptionParserSwarming(version=__version__), args)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001131 except Exception as e:
1132 tools.report_error(e)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001133 return 1
1134
1135
1136if __name__ == '__main__':
1137 fix_encoding.fix_encoding()
1138 tools.disable_buffering()
1139 colorama.init()
1140 sys.exit(main(sys.argv[1:]))