blob: 05dde3fdfe85c1a4e144c50039fa21885f304617 [file] [log] [blame]
maruel@chromium.org0437a732013-08-27 16:05:52 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00005
6"""Client tool to trigger tasks or retrieve results from a Swarming server."""
7
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -05008__version__ = '0.6.1'
maruel@chromium.org0437a732013-08-27 16:05:52 +00009
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -050010import collections
maruel@chromium.org0437a732013-08-27 16:05:52 +000011import json
12import logging
13import os
Vadim Shtayurae3fbd102014-04-29 17:05:21 -070014import re
maruel@chromium.org0437a732013-08-27 16:05:52 +000015import shutil
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -040016import StringIO
maruel@chromium.org0437a732013-08-27 16:05:52 +000017import subprocess
18import sys
Vadim Shtayurab19319e2014-04-27 08:50:06 -070019import threading
maruel@chromium.org0437a732013-08-27 16:05:52 +000020import time
21import urllib
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -040022import urlparse
23import zipfile
maruel@chromium.org0437a732013-08-27 16:05:52 +000024
25from third_party import colorama
26from third_party.depot_tools import fix_encoding
27from third_party.depot_tools import subcommand
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000028
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -050029from utils import file_path
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040030from third_party.chromium import natsort
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000031from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040032from utils import on_error
maruel@chromium.org0437a732013-08-27 16:05:52 +000033from utils import threading_utils
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000034from utils import tools
35from utils import zip_package
maruel@chromium.org0437a732013-08-27 16:05:52 +000036
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080037import auth
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040038import isolated_format
maruel@chromium.org7b844a62013-09-17 13:04:59 +000039import isolateserver
maruel@chromium.org0437a732013-08-27 16:05:52 +000040import run_isolated
41
42
43ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -050044
45
46class Failure(Exception):
47 """Generic failure."""
48 pass
49
50
51### Isolated file handling.
52
53
54def isolated_upload_zip_bundle(isolate_server, bundle):
55 """Uploads a zip package to Isolate Server and returns raw fetch URL.
56
57 Args:
58 isolate_server: URL of an Isolate Server.
59 bundle: instance of ZipPackage to upload.
60
61 Returns:
62 URL to get the file from.
63 """
64 # Swarming bot needs to be able to grab the file from the Isolate Server using
65 # a simple HTTPS GET. Use 'default' namespace so that the raw data returned to
66 # a bot is not zipped, since the swarming_bot doesn't understand compressed
67 # data. This namespace have nothing to do with |namespace| passed to
68 # run_isolated.py that is used to store files for isolated task.
69 logging.info('Zipping up and uploading files...')
70 start_time = time.time()
71 isolate_item = isolateserver.BufferItem(bundle.zip_into_buffer())
72 with isolateserver.get_storage(isolate_server, 'default') as storage:
73 uploaded = storage.upload_items([isolate_item])
74 bundle_url = storage.get_fetch_url(isolate_item)
75 elapsed = time.time() - start_time
76 if isolate_item in uploaded:
77 logging.info('Upload complete, time elapsed: %f', elapsed)
78 else:
79 logging.info('Zip file already on server, time elapsed: %f', elapsed)
80 return bundle_url
81
82
83def isolated_get_data(isolate_server):
84 """Returns the 'data' section with all files necessary to bootstrap a task
85 execution running an isolated task.
86
87 It's mainly zipping run_isolated.zip over and over again.
88 TODO(maruel): Get rid of this with.
89 https://code.google.com/p/swarming/issues/detail?id=173
90 """
91 bundle = zip_package.ZipPackage(ROOT_DIR)
92 bundle.add_buffer(
93 'run_isolated.zip',
94 run_isolated.get_as_zip_package().zip_into_buffer(compress=False))
95 bundle_url = isolated_upload_zip_bundle(isolate_server, bundle)
96 return [(bundle_url, 'swarm_data.zip')]
97
98
99def isolated_get_run_commands(
100 isolate_server, namespace, isolated_hash, extra_args, verbose):
101 """Returns the 'commands' to run an isolated task via run_isolated.zip.
102
103 Returns:
104 commands list to be added to the request.
105 """
106 run_cmd = [
107 'python', 'run_isolated.zip',
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500108 '--isolated', isolated_hash,
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500109 '--isolate-server', isolate_server,
110 '--namespace', namespace,
111 ]
112 if verbose:
113 run_cmd.append('--verbose')
114 # Pass all extra args for run_isolated.py, it will pass them to the command.
115 if extra_args:
116 run_cmd.append('--')
117 run_cmd.extend(extra_args)
118 return run_cmd
119
120
121def isolated_archive(isolate_server, namespace, isolated, algo, verbose):
122 """Archives a .isolated and all the dependencies on the Isolate Server."""
123 logging.info(
124 'isolated_archive(%s, %s, %s)', isolate_server, namespace, isolated)
125 print('Archiving: %s' % isolated)
126 cmd = [
127 sys.executable,
128 os.path.join(ROOT_DIR, 'isolate.py'),
129 'archive',
130 '--isolate-server', isolate_server,
131 '--namespace', namespace,
132 '--isolated', isolated,
133 ]
134 cmd.extend(['--verbose'] * verbose)
135 logging.info(' '.join(cmd))
136 if subprocess.call(cmd, verbose):
137 return None
138 return isolated_format.hash_file(isolated, algo)
139
140
141def isolated_to_hash(isolate_server, namespace, arg, algo, verbose):
142 """Archives a .isolated file if needed.
143
144 Returns the file hash to trigger and a bool specifying if it was a file (True)
145 or a hash (False).
146 """
147 if arg.endswith('.isolated'):
148 file_hash = isolated_archive(isolate_server, namespace, arg, algo, verbose)
149 if not file_hash:
150 on_error.report('Archival failure %s' % arg)
151 return None, True
152 return file_hash, True
153 elif isolated_format.is_valid_hash(arg, algo):
154 return arg, False
155 else:
156 on_error.report('Invalid hash %s' % arg)
157 return None, False
158
159
160def isolated_handle_options(options, args):
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500161 """Handles '--isolated <isolated>', '<isolated>' and '-- <args...>' arguments.
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500162
163 Returns:
164 tuple(command, data).
165 """
166 isolated_cmd_args = []
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500167 if not options.isolated:
168 if '--' in args:
169 index = args.index('--')
170 isolated_cmd_args = args[index+1:]
171 args = args[:index]
172 else:
173 # optparse eats '--' sometimes.
174 isolated_cmd_args = args[1:]
175 args = args[:1]
176 if len(args) != 1:
177 raise ValueError(
178 'Use --isolated, --raw-cmd or \'--\' to pass arguments to the called '
179 'process.')
180 # Old code. To be removed eventually.
181 options.isolated, is_file = isolated_to_hash(
182 options.isolate_server, options.namespace, args[0],
183 isolated_format.get_hash_algo(options.namespace), options.verbose)
184 if not options.isolated:
185 raise ValueError('Invalid argument %s' % args[0])
186 elif args:
187 is_file = False
188 if '--' in args:
189 index = args.index('--')
190 isolated_cmd_args = args[index+1:]
191 if index != 0:
192 raise ValueError('Unexpected arguments.')
193 else:
194 # optparse eats '--' sometimes.
195 isolated_cmd_args = args
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500196
197 command = isolated_get_run_commands(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500198 options.isolate_server, options.namespace, options.isolated,
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500199 isolated_cmd_args, options.verbose)
200
201 # If a file name was passed, use its base name of the isolated hash.
202 # Otherwise, use user name as an approximation of a task name.
203 if not options.task_name:
204 if is_file:
205 key = os.path.splitext(os.path.basename(args[0]))[0]
206 else:
207 key = options.user
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500208 options.task_name = u'%s/%s/%s' % (
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500209 key,
210 '_'.join(
211 '%s=%s' % (k, v)
212 for k, v in sorted(options.dimensions.iteritems())),
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500213 options.isolated)
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500214
215 try:
216 data = isolated_get_data(options.isolate_server)
217 except (IOError, OSError):
218 on_error.report('Failed to upload the zip file')
219 raise ValueError('Failed to upload the zip file')
220
221 return command, data
222
223
224### Triggering.
225
226
227TaskRequest = collections.namedtuple(
228 'TaskRequest',
229 [
230 'command',
231 'data',
232 'dimensions',
233 'env',
234 'expiration',
235 'hard_timeout',
236 'idempotent',
237 'io_timeout',
238 'name',
239 'priority',
240 'tags',
241 'user',
242 'verbose',
243 ])
244
245
246def task_request_to_raw_request(task_request):
247 """Returns the json dict expected by the Swarming server for new request.
248
249 This is for the v1 client Swarming API.
250 """
251 return {
252 'name': task_request.name,
Marc-Antoine Rueld863df32015-01-24 20:34:48 -0500253 'parent_task_id': os.environ.get('SWARMING_TASK_ID', ''),
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500254 'priority': task_request.priority,
255 'properties': {
256 'commands': [task_request.command],
257 'data': task_request.data,
258 'dimensions': task_request.dimensions,
259 'env': task_request.env,
260 'execution_timeout_secs': task_request.hard_timeout,
261 'io_timeout_secs': task_request.io_timeout,
262 'idempotent': task_request.idempotent,
263 },
264 'scheduling_expiration_secs': task_request.expiration,
265 'tags': task_request.tags,
266 'user': task_request.user,
267 }
268
269
270def swarming_handshake(swarming):
271 """Initiates the connection to the Swarming server."""
272 headers = {'X-XSRF-Token-Request': '1'}
273 response = net.url_read_json(
274 swarming + '/swarming/api/v1/client/handshake',
275 headers=headers,
276 data={})
277 if not response:
278 logging.error('Failed to handshake with server')
279 return None
280 logging.info('Connected to server version: %s', response['server_version'])
281 return response['xsrf_token']
282
283
284def swarming_trigger(swarming, raw_request, xsrf_token):
285 """Triggers a request on the Swarming server and returns the json data.
286
287 It's the low-level function.
288
289 Returns:
290 {
291 'request': {
292 'created_ts': u'2010-01-02 03:04:05',
293 'name': ..
294 },
295 'task_id': '12300',
296 }
297 """
298 logging.info('Triggering: %s', raw_request['name'])
299
300 headers = {'X-XSRF-Token': xsrf_token}
301 result = net.url_read_json(
302 swarming + '/swarming/api/v1/client/request',
303 data=raw_request,
304 headers=headers)
305 if not result:
306 on_error.report('Failed to trigger task %s' % raw_request['name'])
307 return None
308 return result
309
310
311def setup_googletest(env, shards, index):
312 """Sets googletest specific environment variables."""
313 if shards > 1:
314 env = env.copy()
315 env['GTEST_SHARD_INDEX'] = str(index)
316 env['GTEST_TOTAL_SHARDS'] = str(shards)
317 return env
318
319
320def trigger_task_shards(swarming, task_request, shards):
321 """Triggers one or many subtasks of a sharded task.
322
323 Returns:
324 Dict with task details, returned to caller as part of --dump-json output.
325 None in case of failure.
326 """
327 def convert(index):
328 req = task_request
329 if shards > 1:
330 req = req._replace(
331 env=setup_googletest(req.env, shards, index),
332 name='%s:%s:%s' % (req.name, index, shards))
333 return task_request_to_raw_request(req)
334
335 requests = [convert(index) for index in xrange(shards)]
336 xsrf_token = swarming_handshake(swarming)
337 if not xsrf_token:
338 return None
339 tasks = {}
340 priority_warning = False
341 for index, request in enumerate(requests):
342 task = swarming_trigger(swarming, request, xsrf_token)
343 if not task:
344 break
345 logging.info('Request result: %s', task)
346 if (not priority_warning and
347 task['request']['priority'] != task_request.priority):
348 priority_warning = True
349 print >> sys.stderr, (
350 'Priority was reset to %s' % task['request']['priority'])
351 tasks[request['name']] = {
352 'shard_index': index,
353 'task_id': task['task_id'],
354 'view_url': '%s/user/task/%s' % (swarming, task['task_id']),
355 }
356
357 # Some shards weren't triggered. Abort everything.
358 if len(tasks) != len(requests):
359 if tasks:
360 print >> sys.stderr, 'Only %d shard(s) out of %d were triggered' % (
361 len(tasks), len(requests))
362 for task_dict in tasks.itervalues():
363 abort_task(swarming, task_dict['task_id'])
364 return None
365
366 return tasks
367
368
369### Collection.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000370
371
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700372# How often to print status updates to stdout in 'collect'.
373STATUS_UPDATE_INTERVAL = 15 * 60.
374
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400375
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400376class State(object):
377 """States in which a task can be.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000378
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400379 WARNING: Copy-pasted from appengine/swarming/server/task_result.py. These
380 values are part of the API so if they change, the API changed.
381
382 It's in fact an enum. Values should be in decreasing order of importance.
383 """
384 RUNNING = 0x10
385 PENDING = 0x20
386 EXPIRED = 0x30
387 TIMED_OUT = 0x40
388 BOT_DIED = 0x50
389 CANCELED = 0x60
390 COMPLETED = 0x70
391
392 STATES = (RUNNING, PENDING, EXPIRED, TIMED_OUT, BOT_DIED, CANCELED, COMPLETED)
393 STATES_RUNNING = (RUNNING, PENDING)
394 STATES_NOT_RUNNING = (EXPIRED, TIMED_OUT, BOT_DIED, CANCELED, COMPLETED)
395 STATES_DONE = (TIMED_OUT, COMPLETED)
396 STATES_ABANDONED = (EXPIRED, BOT_DIED, CANCELED)
397
398 _NAMES = {
399 RUNNING: 'Running',
400 PENDING: 'Pending',
401 EXPIRED: 'Expired',
402 TIMED_OUT: 'Execution timed out',
403 BOT_DIED: 'Bot died',
404 CANCELED: 'User canceled',
405 COMPLETED: 'Completed',
406 }
407
408 @classmethod
409 def to_string(cls, state):
410 """Returns a user-readable string representing a State."""
411 if state not in cls._NAMES:
412 raise ValueError('Invalid state %s' % state)
413 return cls._NAMES[state]
maruel@chromium.org0437a732013-08-27 16:05:52 +0000414
415
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700416class TaskOutputCollector(object):
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700417 """Assembles task execution summary (for --task-summary-json output).
418
419 Optionally fetches task outputs from isolate server to local disk (used when
420 --task-output-dir is passed).
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700421
422 This object is shared among multiple threads running 'retrieve_results'
423 function, in particular they call 'process_shard_result' method in parallel.
424 """
425
426 def __init__(self, task_output_dir, task_name, shard_count):
427 """Initializes TaskOutputCollector, ensures |task_output_dir| exists.
428
429 Args:
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700430 task_output_dir: (optional) local directory to put fetched files to.
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700431 task_name: name of the swarming task results belong to.
432 shard_count: expected number of task shards.
433 """
434 self.task_output_dir = task_output_dir
435 self.task_name = task_name
436 self.shard_count = shard_count
437
438 self._lock = threading.Lock()
439 self._per_shard_results = {}
440 self._storage = None
441
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700442 if self.task_output_dir and not os.path.isdir(self.task_output_dir):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700443 os.makedirs(self.task_output_dir)
444
Vadim Shtayurab450c602014-05-12 19:23:25 -0700445 def process_shard_result(self, shard_index, result):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700446 """Stores results of a single task shard, fetches output files if necessary.
447
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400448 Modifies |result| in place.
449
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700450 Called concurrently from multiple threads.
451 """
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700452 # Sanity check index is in expected range.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700453 assert isinstance(shard_index, int)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700454 if shard_index < 0 or shard_index >= self.shard_count:
455 logging.warning(
456 'Shard index %d is outside of expected range: [0; %d]',
457 shard_index, self.shard_count - 1)
458 return
459
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400460 assert not 'isolated_out' in result
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400461 result['isolated_out'] = None
462 for output in result['outputs']:
463 isolated_files_location = extract_output_files_location(output)
464 if isolated_files_location:
465 if result['isolated_out']:
466 raise ValueError('Unexpected two task with output')
467 result['isolated_out'] = isolated_files_location
Kevin Graneyc2c3b9e2014-08-26 09:04:17 -0400468
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700469 # Store result dict of that shard, ignore results we've already seen.
470 with self._lock:
471 if shard_index in self._per_shard_results:
472 logging.warning('Ignoring duplicate shard index %d', shard_index)
473 return
474 self._per_shard_results[shard_index] = result
475
476 # Fetch output files if necessary.
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400477 if self.task_output_dir and result['isolated_out']:
478 storage = self._get_storage(
479 result['isolated_out']['server'],
480 result['isolated_out']['namespace'])
481 if storage:
482 # Output files are supposed to be small and they are not reused across
483 # tasks. So use MemoryCache for them instead of on-disk cache. Make
484 # files writable, so that calling script can delete them.
485 isolateserver.fetch_isolated(
486 result['isolated_out']['hash'],
487 storage,
488 isolateserver.MemoryCache(file_mode_mask=0700),
489 os.path.join(self.task_output_dir, str(shard_index)),
490 False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700491
492 def finalize(self):
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700493 """Assembles and returns task summary JSON, shutdowns underlying Storage."""
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700494 with self._lock:
495 # Write an array of shard results with None for missing shards.
496 summary = {
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700497 'shards': [
498 self._per_shard_results.get(i) for i in xrange(self.shard_count)
499 ],
500 }
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700501 # Write summary.json to task_output_dir as well.
502 if self.task_output_dir:
503 tools.write_json(
504 os.path.join(self.task_output_dir, 'summary.json'),
505 summary,
506 False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700507 if self._storage:
508 self._storage.close()
509 self._storage = None
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700510 return summary
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700511
512 def _get_storage(self, isolate_server, namespace):
513 """Returns isolateserver.Storage to use to fetch files."""
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700514 assert self.task_output_dir
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700515 with self._lock:
516 if not self._storage:
517 self._storage = isolateserver.get_storage(isolate_server, namespace)
518 else:
519 # Shards must all use exact same isolate server and namespace.
520 if self._storage.location != isolate_server:
521 logging.error(
522 'Task shards are using multiple isolate servers: %s and %s',
523 self._storage.location, isolate_server)
524 return None
525 if self._storage.namespace != namespace:
526 logging.error(
527 'Task shards are using multiple namespaces: %s and %s',
528 self._storage.namespace, namespace)
529 return None
530 return self._storage
531
532
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700533def extract_output_files_location(task_log):
534 """Task log -> location of task output files to fetch.
535
536 TODO(vadimsh,maruel): Use side-channel to get this information.
537 See 'run_tha_test' in run_isolated.py for where the data is generated.
538
539 Returns:
540 Tuple (isolate server URL, namespace, isolated hash) on success.
541 None if information is missing or can not be parsed.
542 """
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400543 if not task_log:
544 return None
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700545 match = re.search(
546 r'\[run_isolated_out_hack\](.*)\[/run_isolated_out_hack\]',
547 task_log,
548 re.DOTALL)
549 if not match:
550 return None
551
552 def to_ascii(val):
553 if not isinstance(val, basestring):
554 raise ValueError()
555 return val.encode('ascii')
556
557 try:
558 data = json.loads(match.group(1))
559 if not isinstance(data, dict):
560 raise ValueError()
561 isolated_hash = to_ascii(data['hash'])
562 namespace = to_ascii(data['namespace'])
563 isolate_server = to_ascii(data['storage'])
564 if not file_path.is_url(isolate_server):
565 raise ValueError()
Kevin Graneyc2c3b9e2014-08-26 09:04:17 -0400566 data = {
567 'hash': isolated_hash,
568 'namespace': namespace,
569 'server': isolate_server,
570 'view_url': '%s/browse?%s' % (isolate_server, urllib.urlencode(
571 [('namespace', namespace), ('hash', isolated_hash)])),
572 }
573 return data
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700574 except (KeyError, ValueError):
575 logging.warning(
576 'Unexpected value of run_isolated_out_hack: %s', match.group(1))
577 return None
578
579
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500580def now():
581 """Exists so it can be mocked easily."""
582 return time.time()
583
584
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700585def retrieve_results(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400586 base_url, shard_index, task_id, timeout, should_stop, output_collector):
587 """Retrieves results for a single task ID.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700588
Vadim Shtayurab450c602014-05-12 19:23:25 -0700589 Returns:
590 <result dict> on success.
591 None on failure.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700592 """
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000593 assert isinstance(timeout, float), timeout
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400594 result_url = '%s/swarming/api/v1/client/task/%s' % (base_url, task_id)
595 output_url = '%s/swarming/api/v1/client/task/%s/output/all' % (
596 base_url, task_id)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700597 started = now()
598 deadline = started + timeout if timeout else None
599 attempt = 0
600
601 while not should_stop.is_set():
602 attempt += 1
603
604 # Waiting for too long -> give up.
605 current_time = now()
606 if deadline and current_time >= deadline:
607 logging.error('retrieve_results(%s) timed out on attempt %d',
608 base_url, attempt)
609 return None
610
611 # Do not spin too fast. Spin faster at the beginning though.
612 # Start with 1 sec delay and for each 30 sec of waiting add another second
613 # of delay, until hitting 15 sec ceiling.
614 if attempt > 1:
615 max_delay = min(15, 1 + (current_time - started) / 30.0)
616 delay = min(max_delay, deadline - current_time) if deadline else max_delay
617 if delay > 0:
618 logging.debug('Waiting %.1f sec before retrying', delay)
619 should_stop.wait(delay)
620 if should_stop.is_set():
621 return None
622
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400623 # Disable internal retries in net.url_read_json, since we are doing retries
624 # ourselves.
625 # TODO(maruel): We'd need to know if it's a 404 and not retry at all.
626 result = net.url_read_json(result_url, retry_50x=False)
627 if not result:
Marc-Antoine Ruel200b3952014-08-14 11:07:44 -0400628 continue
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400629 if result['state'] in State.STATES_NOT_RUNNING:
630 out = net.url_read_json(output_url)
631 result['outputs'] = (out or {}).get('outputs', [])
632 if not result['outputs']:
633 logging.error('No output found for task %s', task_id)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700634 # Record the result, try to fetch attached output files (if any).
635 if output_collector:
636 # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700637 output_collector.process_shard_result(shard_index, result)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700638 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +0000639
640
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700641def yield_results(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400642 swarm_base_url, task_ids, timeout, max_threads, print_status_updates,
643 output_collector):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500644 """Yields swarming task results from the swarming server as (index, result).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000645
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700646 Duplicate shards are ignored. Shards are yielded in order of completion.
647 Timed out shards are NOT yielded at all. Caller can compare number of yielded
648 shards with len(task_keys) to verify all shards completed.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000649
650 max_threads is optional and is used to limit the number of parallel fetches
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500651 done. Since in general the number of task_keys is in the range <=10, it's not
maruel@chromium.org0437a732013-08-27 16:05:52 +0000652 worth normally to limit the number threads. Mostly used for testing purposes.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500653
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700654 output_collector is an optional instance of TaskOutputCollector that will be
655 used to fetch files produced by a task from isolate server to the local disk.
656
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500657 Yields:
658 (index, result). In particular, 'result' is defined as the
659 GetRunnerResults() function in services/swarming/server/test_runner.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000660 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000661 number_threads = (
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400662 min(max_threads, len(task_ids)) if max_threads else len(task_ids))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700663 should_stop = threading.Event()
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700664 results_channel = threading_utils.TaskChannel()
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700665
maruel@chromium.org0437a732013-08-27 16:05:52 +0000666 with threading_utils.ThreadPool(number_threads, number_threads, 0) as pool:
667 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700668 # Adds a task to the thread pool to call 'retrieve_results' and return
669 # the results together with shard_index that produced them (as a tuple).
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400670 def enqueue_retrieve_results(shard_index, task_id):
Vadim Shtayurab450c602014-05-12 19:23:25 -0700671 task_fn = lambda *args: (shard_index, retrieve_results(*args))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000672 pool.add_task(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400673 0, results_channel.wrap_task(task_fn), swarm_base_url, shard_index,
674 task_id, timeout, should_stop, output_collector)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700675
676 # Enqueue 'retrieve_results' calls for each shard key to run in parallel.
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400677 for shard_index, task_id in enumerate(task_ids):
678 enqueue_retrieve_results(shard_index, task_id)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700679
680 # Wait for all of them to finish.
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400681 shards_remaining = range(len(task_ids))
682 active_task_count = len(task_ids)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700683 while active_task_count:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700684 shard_index, result = None, None
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700685 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700686 shard_index, result = results_channel.pull(
687 timeout=STATUS_UPDATE_INTERVAL)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700688 except threading_utils.TaskChannel.Timeout:
689 if print_status_updates:
690 print(
691 'Waiting for results from the following shards: %s' %
692 ', '.join(map(str, shards_remaining)))
693 sys.stdout.flush()
694 continue
695 except Exception:
696 logging.exception('Unexpected exception in retrieve_results')
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700697
698 # A call to 'retrieve_results' finished (successfully or not).
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700699 active_task_count -= 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000700 if not result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500701 logging.error('Failed to retrieve the results for a swarming key')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000702 continue
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700703
Vadim Shtayurab450c602014-05-12 19:23:25 -0700704 # Yield back results to the caller.
705 assert shard_index in shards_remaining
706 shards_remaining.remove(shard_index)
707 yield shard_index, result
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700708
maruel@chromium.org0437a732013-08-27 16:05:52 +0000709 finally:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700710 # Done or aborted with Ctrl+C, kill the remaining threads.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000711 should_stop.set()
712
713
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400714def decorate_shard_output(
715 swarming, shard_index, result, shard_exit_code, shard_duration):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000716 """Returns wrapped output for swarming task shard."""
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400717 url = '%s/user/task/%s' % (swarming, result['id'])
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400718 tag_header = 'Shard %d %s' % (shard_index, url)
Marc-Antoine Ruel9b17dae2014-10-17 16:28:43 -0400719 tag_footer = 'End of shard %d Duration: %.1fs Bot: %s Exit code %s' % (
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400720 shard_index, shard_duration, result['bot_id'], shard_exit_code)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400721
722 tag_len = max(len(tag_header), len(tag_footer))
723 dash_pad = '+-%s-+\n' % ('-' * tag_len)
724 tag_header = '| %s |\n' % tag_header.ljust(tag_len)
725 tag_footer = '| %s |\n' % tag_footer.ljust(tag_len)
726
727 header = dash_pad + tag_header + dash_pad
728 footer = dash_pad + tag_footer + dash_pad[:-1]
729 output = '\n'.join(o for o in result['outputs'] if o).rstrip() + '\n'
730 return header + output + footer
maruel@chromium.org0437a732013-08-27 16:05:52 +0000731
732
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700733def collect(
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400734 swarming, task_name, task_ids, timeout, decorate, print_status_updates,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400735 task_summary_json, task_output_dir):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500736 """Retrieves results of a Swarming task."""
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700737 # Collect summary JSON and output files (if task_output_dir is not None).
738 output_collector = TaskOutputCollector(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400739 task_output_dir, task_name, len(task_ids))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700740
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700741 seen_shards = set()
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400742 exit_code = 0
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400743 total_duration = 0
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700744 try:
745 for index, output in yield_results(
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400746 swarming, task_ids, timeout, None, print_status_updates,
747 output_collector):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700748 seen_shards.add(index)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700749
Marc-Antoine Ruel9b17dae2014-10-17 16:28:43 -0400750 # Grab first non-zero exit code as an overall shard exit code. Default to
751 # failure if there was no process that even started.
752 shard_exit_code = 1
753 shard_exit_codes = sorted(output['exit_codes'], key=lambda x: not x)
754 if shard_exit_codes:
755 shard_exit_code = shard_exit_codes[0]
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400756 if shard_exit_code:
757 exit_code = shard_exit_code
Vadim Shtayura473455a2014-05-14 15:22:35 -0700758
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400759 shard_duration = sum(i for i in output['durations'] if i)
760 total_duration += shard_duration
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700761 if decorate:
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400762 print(decorate_shard_output(
763 swarming, index, output, shard_exit_code, shard_duration))
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400764 if len(seen_shards) < len(task_ids):
765 print('')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700766 else:
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400767 print('%s: %s %d' % (output['bot_id'], output['id'], shard_exit_code))
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400768 for output in output['outputs']:
769 if not output:
770 continue
771 output = output.rstrip()
772 if output:
773 print(''.join(' %s\n' % l for l in output.splitlines()))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700774 finally:
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700775 summary = output_collector.finalize()
776 if task_summary_json:
777 tools.write_json(task_summary_json, summary, False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700778
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400779 if decorate and total_duration:
780 print('Total duration: %.1fs' % total_duration)
781
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400782 if len(seen_shards) != len(task_ids):
783 missing_shards = [x for x in range(len(task_ids)) if x not in seen_shards]
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700784 print >> sys.stderr, ('Results from some shards are missing: %s' %
785 ', '.join(map(str, missing_shards)))
Vadim Shtayurac524f512014-05-15 09:54:56 -0700786 return 1
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700787
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400788 return exit_code
maruel@chromium.org0437a732013-08-27 16:05:52 +0000789
790
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500791### Commands.
792
793
794def abort_task(_swarming, _manifest):
795 """Given a task manifest that was triggered, aborts its execution."""
796 # TODO(vadimsh): No supported by the server yet.
797
798
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400799def add_filter_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500800 parser.filter_group = tools.optparse.OptionGroup(parser, 'Filtering slaves')
801 parser.filter_group.add_option(
Marc-Antoine Ruelb39e8cf2014-01-20 10:39:31 -0500802 '-d', '--dimension', default=[], action='append', nargs=2,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500803 dest='dimensions', metavar='FOO bar',
804 help='dimension to filter on')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500805 parser.add_option_group(parser.filter_group)
806
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400807
Vadim Shtayurab450c602014-05-12 19:23:25 -0700808def add_sharding_options(parser):
809 parser.sharding_group = tools.optparse.OptionGroup(parser, 'Sharding options')
810 parser.sharding_group.add_option(
811 '--shards', type='int', default=1,
812 help='Number of shards to trigger and collect.')
813 parser.add_option_group(parser.sharding_group)
814
815
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400816def add_trigger_options(parser):
817 """Adds all options to trigger a task on Swarming."""
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -0500818 isolateserver.add_isolate_server_options(parser)
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400819 add_filter_options(parser)
820
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500821 parser.task_group = tools.optparse.OptionGroup(parser, 'Task properties')
822 parser.task_group.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500823 '-s', '--isolated',
824 help='Hash of the .isolated to grab from the isolate server')
825 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500826 '-e', '--env', default=[], action='append', nargs=2, metavar='FOO bar',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700827 help='Environment variables to set')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500828 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500829 '--priority', type='int', default=100,
830 help='The lower value, the more important the task is')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500831 parser.task_group.add_option(
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500832 '-T', '--task-name',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400833 help='Display name of the task. Defaults to '
834 '<base_name>/<dimensions>/<isolated hash>/<timestamp> if an '
835 'isolated file is provided, if a hash is provided, it defaults to '
836 '<user>/<dimensions>/<isolated hash>/<timestamp>')
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400837 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400838 '--tags', action='append', default=[],
839 help='Tags to assign to the task.')
840 parser.task_group.add_option(
Marc-Antoine Ruel686a2872014-12-05 10:06:29 -0500841 '--user', default='',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400842 help='User associated with the task. Defaults to authenticated user on '
843 'the server.')
844 parser.task_group.add_option(
Marc-Antoine Ruel02196392014-10-17 16:29:43 -0400845 '--idempotent', action='store_true', default=False,
846 help='When set, the server will actively try to find a previous task '
847 'with the same parameter and return this result instead if possible')
848 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400849 '--expiration', type='int', default=6*60*60,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400850 help='Seconds to allow the task to be pending for a bot to run before '
851 'this task request expires.')
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400852 parser.task_group.add_option(
Marc-Antoine Ruel77142812014-10-03 11:19:43 -0400853 '--deadline', type='int', dest='expiration',
854 help=tools.optparse.SUPPRESS_HELP)
855 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400856 '--hard-timeout', type='int', default=60*60,
857 help='Seconds to allow the task to complete.')
858 parser.task_group.add_option(
859 '--io-timeout', type='int', default=20*60,
860 help='Seconds to allow the task to be silent.')
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500861 parser.task_group.add_option(
862 '--raw-cmd', action='store_true', default=False,
863 help='When set, the command after -- is used as-is without run_isolated. '
864 'In this case, no .isolated file is expected.')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500865 parser.add_option_group(parser.task_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000866
867
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500868def process_trigger_options(parser, options, args):
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500869 """Processes trigger options and uploads files to isolate server if necessary.
870 """
871 options.dimensions = dict(options.dimensions)
872 options.env = dict(options.env)
873
874 data = []
875 if not options.dimensions:
876 parser.error('Please at least specify one --dimension')
877 if options.raw_cmd:
878 if not args:
879 parser.error(
880 'Arguments with --raw-cmd should be passed after -- as command '
881 'delimiter.')
882 if options.isolate_server:
883 parser.error('Can\'t use both --raw-cmd and --isolate-server.')
884
885 command = args
886 if not options.task_name:
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500887 options.task_name = u'%s/%s' % (
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500888 options.user,
889 '_'.join(
890 '%s=%s' % (k, v)
891 for k, v in sorted(options.dimensions.iteritems())))
892 else:
893 isolateserver.process_isolate_server_options(parser, options, False)
894 try:
895 command, data = isolated_handle_options(options, args)
896 except ValueError as e:
897 parser.error(str(e))
898
899 return TaskRequest(
900 command=command,
901 data=data,
902 dimensions=options.dimensions,
903 env=options.env,
904 expiration=options.expiration,
905 hard_timeout=options.hard_timeout,
906 idempotent=options.idempotent,
907 io_timeout=options.io_timeout,
908 name=options.task_name,
909 priority=options.priority,
910 tags=options.tags,
911 user=options.user,
912 verbose=options.verbose)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000913
914
915def add_collect_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500916 parser.server_group.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000917 '-t', '--timeout',
918 type='float',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400919 default=80*60.,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000920 help='Timeout to wait for result, set to 0 for no timeout; default: '
921 '%default s')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500922 parser.group_logging.add_option(
923 '--decorate', action='store_true', help='Decorate output')
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700924 parser.group_logging.add_option(
925 '--print-status-updates', action='store_true',
926 help='Print periodic status updates')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700927 parser.task_output_group = tools.optparse.OptionGroup(parser, 'Task output')
928 parser.task_output_group.add_option(
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700929 '--task-summary-json',
930 metavar='FILE',
931 help='Dump a summary of task results to this file as json. It contains '
932 'only shards statuses as know to server directly. Any output files '
933 'emitted by the task can be collected by using --task-output-dir')
934 parser.task_output_group.add_option(
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700935 '--task-output-dir',
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700936 metavar='DIR',
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700937 help='Directory to put task results into. When the task finishes, this '
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700938 'directory contains per-shard directory with output files produced '
939 'by shards: <task-output-dir>/<zero-based-shard-index>/.')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700940 parser.add_option_group(parser.task_output_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000941
942
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400943def CMDbots(parser, args):
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400944 """Returns information about the bots connected to the Swarming server."""
945 add_filter_options(parser)
946 parser.filter_group.add_option(
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400947 '--dead-only', action='store_true',
948 help='Only print dead bots, useful to reap them and reimage broken bots')
949 parser.filter_group.add_option(
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400950 '-k', '--keep-dead', action='store_true',
951 help='Do not filter out dead bots')
952 parser.filter_group.add_option(
953 '-b', '--bare', action='store_true',
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400954 help='Do not print out dimensions')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400955 options, args = parser.parse_args(args)
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400956
957 if options.keep_dead and options.dead_only:
958 parser.error('Use only one of --keep-dead and --dead-only')
Vadim Shtayura6b555c12014-07-23 16:22:18 -0700959
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400960 bots = []
961 cursor = None
962 limit = 250
963 # Iterate via cursors.
964 base_url = options.swarming + '/swarming/api/v1/client/bots?limit=%d' % limit
965 while True:
966 url = base_url
967 if cursor:
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400968 url += '&cursor=%s' % urllib.quote(cursor)
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400969 data = net.url_read_json(url)
970 if data is None:
971 print >> sys.stderr, 'Failed to access %s' % options.swarming
972 return 1
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400973 bots.extend(data['items'])
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400974 cursor = data['cursor']
975 if not cursor:
976 break
977
978 for bot in natsort.natsorted(bots, key=lambda x: x['id']):
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400979 if options.dead_only:
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400980 if not bot['is_dead']:
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400981 continue
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400982 elif not options.keep_dead and bot['is_dead']:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400983 continue
984
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400985 # If the user requested to filter on dimensions, ensure the bot has all the
986 # dimensions requested.
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400987 dimensions = bot['dimensions']
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400988 for key, value in options.dimensions:
989 if key not in dimensions:
990 break
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400991 # A bot can have multiple value for a key, for example,
992 # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
993 # be accepted.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400994 if isinstance(dimensions[key], list):
995 if value not in dimensions[key]:
996 break
997 else:
998 if value != dimensions[key]:
999 break
1000 else:
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -04001001 print bot['id']
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -04001002 if not options.bare:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001003 print ' %s' % json.dumps(dimensions, sort_keys=True)
Marc-Antoine Ruelfd491172014-11-19 19:26:13 -05001004 if bot.get('task_id'):
1005 print ' task: %s' % bot['task_id']
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -04001006 return 0
1007
1008
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001009@subcommand.usage('--json file | task_id...')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001010def CMDcollect(parser, args):
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001011 """Retrieves results of one or multiple Swarming task by its ID.
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001012
1013 The result can be in multiple part if the execution was sharded. It can
1014 potentially have retries.
1015 """
1016 add_collect_options(parser)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001017 parser.add_option(
1018 '-j', '--json',
1019 help='Load the task ids from .json as saved by trigger --dump-json')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001020 (options, args) = parser.parse_args(args)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001021 if not args and not options.json:
1022 parser.error('Must specify at least one task id or --json.')
1023 if args and options.json:
1024 parser.error('Only use one of task id or --json.')
1025
1026 if options.json:
Marc-Antoine Ruel9025a782015-03-17 16:42:59 -04001027 try:
1028 with open(options.json) as f:
1029 tasks = sorted(
1030 json.load(f)['tasks'].itervalues(), key=lambda x: x['shard_index'])
1031 args = [t['task_id'] for t in tasks]
1032 except (KeyError, IOError, ValueError):
1033 parser.error('Failed to parse %s' % options.json)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001034 else:
1035 valid = frozenset('0123456789abcdef')
1036 if any(not valid.issuperset(task_id) for task_id in args):
1037 parser.error('Task ids are 0-9a-f.')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001038
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001039 try:
1040 return collect(
1041 options.swarming,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001042 None,
1043 args,
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001044 options.timeout,
1045 options.decorate,
1046 options.print_status_updates,
1047 options.task_summary_json,
1048 options.task_output_dir)
1049 except Failure:
1050 on_error.report(None)
1051 return 1
1052
1053
1054@subcommand.usage('[resource name]')
1055def CMDquery(parser, args):
1056 """Returns raw JSON information via an URL endpoint. Use 'list' to gather the
1057 list of valid values from the server.
1058
1059 Examples:
1060 Printing the list of known URLs:
1061 swarming.py query -S https://server-url list
1062
1063 Listing last 50 tasks on a specific bot named 'swarm1'
1064 swarming.py query -S https://server-url --limit 50 bot/swarm1/tasks
1065 """
1066 CHUNK_SIZE = 250
1067
1068 parser.add_option(
1069 '-L', '--limit', type='int', default=200,
1070 help='Limit to enforce on limitless items (like number of tasks); '
1071 'default=%default')
Paweł Hajdan, Jr53ef0132015-03-20 17:49:18 +01001072 parser.add_option(
1073 '--json', help='Path to JSON output file (otherwise prints to stdout)')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001074 (options, args) = parser.parse_args(args)
1075 if len(args) != 1:
1076 parser.error('Must specify only one resource name.')
1077
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001078 base_url = options.swarming + '/swarming/api/v1/client/' + args[0]
1079 url = base_url
1080 if options.limit:
Marc-Antoine Ruelea74f292014-10-24 20:55:39 -04001081 # Check check, change if not working out.
1082 merge_char = '&' if '?' in url else '?'
1083 url += '%slimit=%d' % (merge_char, min(CHUNK_SIZE, options.limit))
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001084 data = net.url_read_json(url)
1085 if data is None:
1086 print >> sys.stderr, 'Failed to access %s' % options.swarming
1087 return 1
1088
1089 # Some items support cursors. Try to get automatically if cursors are needed
1090 # by looking at the 'cursor' items.
1091 while (
1092 data.get('cursor') and
1093 (not options.limit or len(data['items']) < options.limit)):
Marc-Antoine Ruel0696e402015-03-23 15:28:44 -04001094 merge_char = '&' if '?' in base_url else '?'
1095 url = base_url + '%scursor=%s' % (merge_char, urllib.quote(data['cursor']))
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001096 if options.limit:
1097 url += '&limit=%d' % min(CHUNK_SIZE, options.limit - len(data['items']))
1098 new = net.url_read_json(url)
1099 if new is None:
1100 print >> sys.stderr, 'Failed to access %s' % options.swarming
1101 return 1
1102 data['items'].extend(new['items'])
1103 data['cursor'] = new['cursor']
1104
1105 if options.limit and len(data.get('items', [])) > options.limit:
1106 data['items'] = data['items'][:options.limit]
1107 data.pop('cursor', None)
1108
Paweł Hajdan, Jr53ef0132015-03-20 17:49:18 +01001109 if options.json:
1110 with open(options.json, 'w') as f:
1111 json.dump(data, f)
1112 else:
Marc-Antoine Ruelcda90ee2015-03-23 15:13:20 -04001113 try:
1114 json.dump(data, sys.stdout, indent=2, sort_keys=True)
1115 sys.stdout.write('\n')
1116 except IOError:
1117 pass
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001118 return 0
1119
1120
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001121@subcommand.usage('(hash|isolated) [-- extra_args]')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001122def CMDrun(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001123 """Triggers a task and wait for the results.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001124
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001125 Basically, does everything to run a command remotely.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001126 """
1127 add_trigger_options(parser)
1128 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001129 add_sharding_options(parser)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001130 options, args = parser.parse_args(args)
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001131 task_request = process_trigger_options(parser, options, args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001132 try:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001133 tasks = trigger_task_shards(
1134 options.swarming, task_request, options.shards)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001135 except Failure as e:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001136 on_error.report(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001137 'Failed to trigger %s(%s): %s' %
1138 (options.task_name, args[0], e.args[0]))
1139 return 1
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001140 if not tasks:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001141 on_error.report('Failed to trigger the task.')
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001142 return 1
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001143 print('Triggered task: %s' % options.task_name)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001144 task_ids = [
1145 t['task_id']
1146 for t in sorted(tasks.itervalues(), key=lambda x: x['shard_index'])
1147 ]
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001148 try:
1149 return collect(
1150 options.swarming,
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001151 options.task_name,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001152 task_ids,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001153 options.timeout,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -07001154 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001155 options.print_status_updates,
Vadim Shtayurac8437bf2014-07-09 19:45:36 -07001156 options.task_summary_json,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001157 options.task_output_dir)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001158 except Failure:
1159 on_error.report(None)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001160 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001161
1162
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001163@subcommand.usage('task_id')
1164def CMDreproduce(parser, args):
1165 """Runs a task locally that was triggered on the server.
1166
1167 This running locally the same commands that have been run on the bot. The data
1168 downloaded will be in a subdirectory named 'work' of the current working
1169 directory.
1170 """
1171 options, args = parser.parse_args(args)
1172 if len(args) != 1:
1173 parser.error('Must specify exactly one task id.')
1174
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001175 url = options.swarming + '/swarming/api/v1/client/task/%s/request' % args[0]
1176 request = net.url_read_json(url)
1177 if not request:
1178 print >> sys.stderr, 'Failed to retrieve request data for the task'
1179 return 1
1180
1181 if not os.path.isdir('work'):
1182 os.mkdir('work')
1183
1184 swarming_host = urlparse.urlparse(options.swarming).netloc
1185 properties = request['properties']
1186 for data_url, _ in properties['data']:
1187 assert data_url.startswith('https://'), data_url
1188 data_host = urlparse.urlparse(data_url).netloc
1189 if data_host != swarming_host:
1190 auth.ensure_logged_in('https://' + data_host)
1191
1192 content = net.url_read(data_url)
1193 if content is None:
1194 print >> sys.stderr, 'Failed to download %s' % data_url
1195 return 1
1196 with zipfile.ZipFile(StringIO.StringIO(content)) as zip_file:
1197 zip_file.extractall('work')
1198
1199 env = None
1200 if properties['env']:
1201 env = os.environ.copy()
Marc-Antoine Ruel119b0842014-12-19 15:27:58 -05001202 logging.info('env: %r', properties['env'])
1203 env.update(
1204 (k.encode('utf-8'), v.encode('utf-8'))
1205 for k, v in properties['env'].iteritems())
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001206
1207 exit_code = 0
1208 for cmd in properties['commands']:
1209 try:
1210 c = subprocess.call(cmd, env=env, cwd='work')
1211 except OSError as e:
1212 print >> sys.stderr, 'Failed to run: %s' % ' '.join(cmd)
1213 print >> sys.stderr, str(e)
1214 c = 1
1215 if not exit_code:
1216 exit_code = c
1217 return exit_code
1218
1219
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001220@subcommand.usage("(hash|isolated) [-- extra_args|raw command]")
maruel@chromium.org0437a732013-08-27 16:05:52 +00001221def CMDtrigger(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001222 """Triggers a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001223
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001224 Accepts either the hash (sha1) of a .isolated file already uploaded or the
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001225 path to an .isolated file to archive.
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001226
1227 If an .isolated file is specified instead of an hash, it is first archived.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001228
1229 Passes all extra arguments provided after '--' as additional command line
1230 arguments for an isolated command specified in *.isolate file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001231 """
1232 add_trigger_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001233 add_sharding_options(parser)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001234 parser.add_option(
1235 '--dump-json',
1236 metavar='FILE',
1237 help='Dump details about the triggered task(s) to this file as json')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001238 options, args = parser.parse_args(args)
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001239 task_request = process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001240 try:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001241 tasks = trigger_task_shards(
1242 options.swarming, task_request, options.shards)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001243 if tasks:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001244 print('Triggered task: %s' % options.task_name)
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001245 tasks_sorted = sorted(
1246 tasks.itervalues(), key=lambda x: x['shard_index'])
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001247 if options.dump_json:
1248 data = {
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001249 'base_task_name': options.task_name,
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001250 'tasks': tasks,
1251 }
1252 tools.write_json(options.dump_json, data, True)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001253 print('To collect results, use:')
1254 print(' swarming.py collect -S %s --json %s' %
1255 (options.swarming, options.dump_json))
1256 else:
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001257 print('To collect results, use:')
1258 print(' swarming.py collect -S %s %s' %
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001259 (options.swarming, ' '.join(t['task_id'] for t in tasks_sorted)))
1260 print('Or visit:')
1261 for t in tasks_sorted:
1262 print(' ' + t['view_url'])
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001263 return int(not tasks)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001264 except Failure:
1265 on_error.report(None)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001266 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001267
1268
1269class OptionParserSwarming(tools.OptionParserWithLogging):
1270 def __init__(self, **kwargs):
1271 tools.OptionParserWithLogging.__init__(
1272 self, prog='swarming.py', **kwargs)
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001273 self.server_group = tools.optparse.OptionGroup(self, 'Server')
1274 self.server_group.add_option(
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001275 '-S', '--swarming',
Kevin Graney5346c162014-01-24 12:20:01 -05001276 metavar='URL', default=os.environ.get('SWARMING_SERVER', ''),
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001277 help='Swarming server to use')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001278 self.add_option_group(self.server_group)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001279 auth.add_auth_options(self)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001280
1281 def parse_args(self, *args, **kwargs):
1282 options, args = tools.OptionParserWithLogging.parse_args(
1283 self, *args, **kwargs)
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001284 auth.process_auth_options(self, options)
1285 user = self._process_swarming(options)
1286 if hasattr(options, 'user') and not options.user:
1287 options.user = user
1288 return options, args
1289
1290 def _process_swarming(self, options):
1291 """Processes the --swarming option and aborts if not specified.
1292
1293 Returns the identity as determined by the server.
1294 """
maruel@chromium.org0437a732013-08-27 16:05:52 +00001295 if not options.swarming:
1296 self.error('--swarming is required.')
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001297 try:
1298 options.swarming = net.fix_url(options.swarming)
1299 except ValueError as e:
1300 self.error('--swarming %s' % e)
1301 on_error.report_on_exception_exit(options.swarming)
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05001302 try:
1303 user = auth.ensure_logged_in(options.swarming)
1304 except ValueError as e:
1305 self.error(str(e))
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001306 return user
maruel@chromium.org0437a732013-08-27 16:05:52 +00001307
1308
1309def main(args):
1310 dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001311 return dispatcher.execute(OptionParserSwarming(version=__version__), args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001312
1313
1314if __name__ == '__main__':
1315 fix_encoding.fix_encoding()
1316 tools.disable_buffering()
1317 colorama.init()
1318 sys.exit(main(sys.argv[1:]))