blob: 6b5963f5fa12fda0b46d1ad09f55d62b48cf6518 [file] [log] [blame]
maruel@chromium.org0437a732013-08-27 16:05:52 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00005
6"""Client tool to trigger tasks or retrieve results from a Swarming server."""
7
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05008__version__ = '0.6'
maruel@chromium.org0437a732013-08-27 16:05:52 +00009
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -050010import collections
maruel@chromium.org0437a732013-08-27 16:05:52 +000011import json
12import logging
13import os
Vadim Shtayurae3fbd102014-04-29 17:05:21 -070014import re
maruel@chromium.org0437a732013-08-27 16:05:52 +000015import shutil
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -040016import StringIO
maruel@chromium.org0437a732013-08-27 16:05:52 +000017import subprocess
18import sys
Vadim Shtayurab19319e2014-04-27 08:50:06 -070019import threading
maruel@chromium.org0437a732013-08-27 16:05:52 +000020import time
21import urllib
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -040022import urlparse
23import zipfile
maruel@chromium.org0437a732013-08-27 16:05:52 +000024
25from third_party import colorama
26from third_party.depot_tools import fix_encoding
27from third_party.depot_tools import subcommand
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000028
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -050029from utils import file_path
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040030from third_party.chromium import natsort
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000031from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040032from utils import on_error
maruel@chromium.org0437a732013-08-27 16:05:52 +000033from utils import threading_utils
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000034from utils import tools
35from utils import zip_package
maruel@chromium.org0437a732013-08-27 16:05:52 +000036
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080037import auth
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040038import isolated_format
maruel@chromium.org7b844a62013-09-17 13:04:59 +000039import isolateserver
maruel@chromium.org0437a732013-08-27 16:05:52 +000040import run_isolated
41
42
43ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -050044
45
46class Failure(Exception):
47 """Generic failure."""
48 pass
49
50
51### Isolated file handling.
52
53
54def isolated_upload_zip_bundle(isolate_server, bundle):
55 """Uploads a zip package to Isolate Server and returns raw fetch URL.
56
57 Args:
58 isolate_server: URL of an Isolate Server.
59 bundle: instance of ZipPackage to upload.
60
61 Returns:
62 URL to get the file from.
63 """
64 # Swarming bot needs to be able to grab the file from the Isolate Server using
65 # a simple HTTPS GET. Use 'default' namespace so that the raw data returned to
66 # a bot is not zipped, since the swarming_bot doesn't understand compressed
67 # data. This namespace have nothing to do with |namespace| passed to
68 # run_isolated.py that is used to store files for isolated task.
69 logging.info('Zipping up and uploading files...')
70 start_time = time.time()
71 isolate_item = isolateserver.BufferItem(bundle.zip_into_buffer())
72 with isolateserver.get_storage(isolate_server, 'default') as storage:
73 uploaded = storage.upload_items([isolate_item])
74 bundle_url = storage.get_fetch_url(isolate_item)
75 elapsed = time.time() - start_time
76 if isolate_item in uploaded:
77 logging.info('Upload complete, time elapsed: %f', elapsed)
78 else:
79 logging.info('Zip file already on server, time elapsed: %f', elapsed)
80 return bundle_url
81
82
83def isolated_get_data(isolate_server):
84 """Returns the 'data' section with all files necessary to bootstrap a task
85 execution running an isolated task.
86
87 It's mainly zipping run_isolated.zip over and over again.
88 TODO(maruel): Get rid of this with.
89 https://code.google.com/p/swarming/issues/detail?id=173
90 """
91 bundle = zip_package.ZipPackage(ROOT_DIR)
92 bundle.add_buffer(
93 'run_isolated.zip',
94 run_isolated.get_as_zip_package().zip_into_buffer(compress=False))
95 bundle_url = isolated_upload_zip_bundle(isolate_server, bundle)
96 return [(bundle_url, 'swarm_data.zip')]
97
98
99def isolated_get_run_commands(
100 isolate_server, namespace, isolated_hash, extra_args, verbose):
101 """Returns the 'commands' to run an isolated task via run_isolated.zip.
102
103 Returns:
104 commands list to be added to the request.
105 """
106 run_cmd = [
107 'python', 'run_isolated.zip',
108 '--hash', isolated_hash,
109 '--isolate-server', isolate_server,
110 '--namespace', namespace,
111 ]
112 if verbose:
113 run_cmd.append('--verbose')
114 # Pass all extra args for run_isolated.py, it will pass them to the command.
115 if extra_args:
116 run_cmd.append('--')
117 run_cmd.extend(extra_args)
118 return run_cmd
119
120
121def isolated_archive(isolate_server, namespace, isolated, algo, verbose):
122 """Archives a .isolated and all the dependencies on the Isolate Server."""
123 logging.info(
124 'isolated_archive(%s, %s, %s)', isolate_server, namespace, isolated)
125 print('Archiving: %s' % isolated)
126 cmd = [
127 sys.executable,
128 os.path.join(ROOT_DIR, 'isolate.py'),
129 'archive',
130 '--isolate-server', isolate_server,
131 '--namespace', namespace,
132 '--isolated', isolated,
133 ]
134 cmd.extend(['--verbose'] * verbose)
135 logging.info(' '.join(cmd))
136 if subprocess.call(cmd, verbose):
137 return None
138 return isolated_format.hash_file(isolated, algo)
139
140
141def isolated_to_hash(isolate_server, namespace, arg, algo, verbose):
142 """Archives a .isolated file if needed.
143
144 Returns the file hash to trigger and a bool specifying if it was a file (True)
145 or a hash (False).
146 """
147 if arg.endswith('.isolated'):
148 file_hash = isolated_archive(isolate_server, namespace, arg, algo, verbose)
149 if not file_hash:
150 on_error.report('Archival failure %s' % arg)
151 return None, True
152 return file_hash, True
153 elif isolated_format.is_valid_hash(arg, algo):
154 return arg, False
155 else:
156 on_error.report('Invalid hash %s' % arg)
157 return None, False
158
159
160def isolated_handle_options(options, args):
161 """Handles isolated arguments.
162
163 Returns:
164 tuple(command, data).
165 """
166 isolated_cmd_args = []
167 if '--' in args:
168 index = args.index('--')
169 isolated_cmd_args = args[index+1:]
170 args = args[:index]
171 else:
172 # optparse eats '--' sometimes.
173 isolated_cmd_args = args[1:]
174 args = args[:1]
175 if len(args) != 1:
176 raise ValueError('Must pass one .isolated file or its hash (sha1).')
177
178 isolated_hash, is_file = isolated_to_hash(
179 options.isolate_server, options.namespace, args[0],
180 isolated_format.get_hash_algo(options.namespace), options.verbose)
181 if not isolated_hash:
182 raise ValueError('Invalid argument %s' % args[0])
183
184 command = isolated_get_run_commands(
185 options.isolate_server, options.namespace, isolated_hash,
186 isolated_cmd_args, options.verbose)
187
188 # If a file name was passed, use its base name of the isolated hash.
189 # Otherwise, use user name as an approximation of a task name.
190 if not options.task_name:
191 if is_file:
192 key = os.path.splitext(os.path.basename(args[0]))[0]
193 else:
194 key = options.user
195 options.task_name = '%s/%s/%s' % (
196 key,
197 '_'.join(
198 '%s=%s' % (k, v)
199 for k, v in sorted(options.dimensions.iteritems())),
200 isolated_hash)
201
202 try:
203 data = isolated_get_data(options.isolate_server)
204 except (IOError, OSError):
205 on_error.report('Failed to upload the zip file')
206 raise ValueError('Failed to upload the zip file')
207
208 return command, data
209
210
211### Triggering.
212
213
214TaskRequest = collections.namedtuple(
215 'TaskRequest',
216 [
217 'command',
218 'data',
219 'dimensions',
220 'env',
221 'expiration',
222 'hard_timeout',
223 'idempotent',
224 'io_timeout',
225 'name',
226 'priority',
227 'tags',
228 'user',
229 'verbose',
230 ])
231
232
233def task_request_to_raw_request(task_request):
234 """Returns the json dict expected by the Swarming server for new request.
235
236 This is for the v1 client Swarming API.
237 """
238 return {
239 'name': task_request.name,
Marc-Antoine Rueld863df32015-01-24 20:34:48 -0500240 'parent_task_id': os.environ.get('SWARMING_TASK_ID', ''),
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500241 'priority': task_request.priority,
242 'properties': {
243 'commands': [task_request.command],
244 'data': task_request.data,
245 'dimensions': task_request.dimensions,
246 'env': task_request.env,
247 'execution_timeout_secs': task_request.hard_timeout,
248 'io_timeout_secs': task_request.io_timeout,
249 'idempotent': task_request.idempotent,
250 },
251 'scheduling_expiration_secs': task_request.expiration,
252 'tags': task_request.tags,
253 'user': task_request.user,
254 }
255
256
257def swarming_handshake(swarming):
258 """Initiates the connection to the Swarming server."""
259 headers = {'X-XSRF-Token-Request': '1'}
260 response = net.url_read_json(
261 swarming + '/swarming/api/v1/client/handshake',
262 headers=headers,
263 data={})
264 if not response:
265 logging.error('Failed to handshake with server')
266 return None
267 logging.info('Connected to server version: %s', response['server_version'])
268 return response['xsrf_token']
269
270
271def swarming_trigger(swarming, raw_request, xsrf_token):
272 """Triggers a request on the Swarming server and returns the json data.
273
274 It's the low-level function.
275
276 Returns:
277 {
278 'request': {
279 'created_ts': u'2010-01-02 03:04:05',
280 'name': ..
281 },
282 'task_id': '12300',
283 }
284 """
285 logging.info('Triggering: %s', raw_request['name'])
286
287 headers = {'X-XSRF-Token': xsrf_token}
288 result = net.url_read_json(
289 swarming + '/swarming/api/v1/client/request',
290 data=raw_request,
291 headers=headers)
292 if not result:
293 on_error.report('Failed to trigger task %s' % raw_request['name'])
294 return None
295 return result
296
297
298def setup_googletest(env, shards, index):
299 """Sets googletest specific environment variables."""
300 if shards > 1:
301 env = env.copy()
302 env['GTEST_SHARD_INDEX'] = str(index)
303 env['GTEST_TOTAL_SHARDS'] = str(shards)
304 return env
305
306
307def trigger_task_shards(swarming, task_request, shards):
308 """Triggers one or many subtasks of a sharded task.
309
310 Returns:
311 Dict with task details, returned to caller as part of --dump-json output.
312 None in case of failure.
313 """
314 def convert(index):
315 req = task_request
316 if shards > 1:
317 req = req._replace(
318 env=setup_googletest(req.env, shards, index),
319 name='%s:%s:%s' % (req.name, index, shards))
320 return task_request_to_raw_request(req)
321
322 requests = [convert(index) for index in xrange(shards)]
323 xsrf_token = swarming_handshake(swarming)
324 if not xsrf_token:
325 return None
326 tasks = {}
327 priority_warning = False
328 for index, request in enumerate(requests):
329 task = swarming_trigger(swarming, request, xsrf_token)
330 if not task:
331 break
332 logging.info('Request result: %s', task)
333 if (not priority_warning and
334 task['request']['priority'] != task_request.priority):
335 priority_warning = True
336 print >> sys.stderr, (
337 'Priority was reset to %s' % task['request']['priority'])
338 tasks[request['name']] = {
339 'shard_index': index,
340 'task_id': task['task_id'],
341 'view_url': '%s/user/task/%s' % (swarming, task['task_id']),
342 }
343
344 # Some shards weren't triggered. Abort everything.
345 if len(tasks) != len(requests):
346 if tasks:
347 print >> sys.stderr, 'Only %d shard(s) out of %d were triggered' % (
348 len(tasks), len(requests))
349 for task_dict in tasks.itervalues():
350 abort_task(swarming, task_dict['task_id'])
351 return None
352
353 return tasks
354
355
356### Collection.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000357
358
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700359# How often to print status updates to stdout in 'collect'.
360STATUS_UPDATE_INTERVAL = 15 * 60.
361
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400362
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400363class State(object):
364 """States in which a task can be.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000365
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400366 WARNING: Copy-pasted from appengine/swarming/server/task_result.py. These
367 values are part of the API so if they change, the API changed.
368
369 It's in fact an enum. Values should be in decreasing order of importance.
370 """
371 RUNNING = 0x10
372 PENDING = 0x20
373 EXPIRED = 0x30
374 TIMED_OUT = 0x40
375 BOT_DIED = 0x50
376 CANCELED = 0x60
377 COMPLETED = 0x70
378
379 STATES = (RUNNING, PENDING, EXPIRED, TIMED_OUT, BOT_DIED, CANCELED, COMPLETED)
380 STATES_RUNNING = (RUNNING, PENDING)
381 STATES_NOT_RUNNING = (EXPIRED, TIMED_OUT, BOT_DIED, CANCELED, COMPLETED)
382 STATES_DONE = (TIMED_OUT, COMPLETED)
383 STATES_ABANDONED = (EXPIRED, BOT_DIED, CANCELED)
384
385 _NAMES = {
386 RUNNING: 'Running',
387 PENDING: 'Pending',
388 EXPIRED: 'Expired',
389 TIMED_OUT: 'Execution timed out',
390 BOT_DIED: 'Bot died',
391 CANCELED: 'User canceled',
392 COMPLETED: 'Completed',
393 }
394
395 @classmethod
396 def to_string(cls, state):
397 """Returns a user-readable string representing a State."""
398 if state not in cls._NAMES:
399 raise ValueError('Invalid state %s' % state)
400 return cls._NAMES[state]
maruel@chromium.org0437a732013-08-27 16:05:52 +0000401
402
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700403class TaskOutputCollector(object):
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700404 """Assembles task execution summary (for --task-summary-json output).
405
406 Optionally fetches task outputs from isolate server to local disk (used when
407 --task-output-dir is passed).
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700408
409 This object is shared among multiple threads running 'retrieve_results'
410 function, in particular they call 'process_shard_result' method in parallel.
411 """
412
413 def __init__(self, task_output_dir, task_name, shard_count):
414 """Initializes TaskOutputCollector, ensures |task_output_dir| exists.
415
416 Args:
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700417 task_output_dir: (optional) local directory to put fetched files to.
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700418 task_name: name of the swarming task results belong to.
419 shard_count: expected number of task shards.
420 """
421 self.task_output_dir = task_output_dir
422 self.task_name = task_name
423 self.shard_count = shard_count
424
425 self._lock = threading.Lock()
426 self._per_shard_results = {}
427 self._storage = None
428
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700429 if self.task_output_dir and not os.path.isdir(self.task_output_dir):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700430 os.makedirs(self.task_output_dir)
431
Vadim Shtayurab450c602014-05-12 19:23:25 -0700432 def process_shard_result(self, shard_index, result):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700433 """Stores results of a single task shard, fetches output files if necessary.
434
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400435 Modifies |result| in place.
436
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700437 Called concurrently from multiple threads.
438 """
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700439 # Sanity check index is in expected range.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700440 assert isinstance(shard_index, int)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700441 if shard_index < 0 or shard_index >= self.shard_count:
442 logging.warning(
443 'Shard index %d is outside of expected range: [0; %d]',
444 shard_index, self.shard_count - 1)
445 return
446
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400447 assert not 'isolated_out' in result
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400448 result['isolated_out'] = None
449 for output in result['outputs']:
450 isolated_files_location = extract_output_files_location(output)
451 if isolated_files_location:
452 if result['isolated_out']:
453 raise ValueError('Unexpected two task with output')
454 result['isolated_out'] = isolated_files_location
Kevin Graneyc2c3b9e2014-08-26 09:04:17 -0400455
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700456 # Store result dict of that shard, ignore results we've already seen.
457 with self._lock:
458 if shard_index in self._per_shard_results:
459 logging.warning('Ignoring duplicate shard index %d', shard_index)
460 return
461 self._per_shard_results[shard_index] = result
462
463 # Fetch output files if necessary.
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400464 if self.task_output_dir and result['isolated_out']:
465 storage = self._get_storage(
466 result['isolated_out']['server'],
467 result['isolated_out']['namespace'])
468 if storage:
469 # Output files are supposed to be small and they are not reused across
470 # tasks. So use MemoryCache for them instead of on-disk cache. Make
471 # files writable, so that calling script can delete them.
472 isolateserver.fetch_isolated(
473 result['isolated_out']['hash'],
474 storage,
475 isolateserver.MemoryCache(file_mode_mask=0700),
476 os.path.join(self.task_output_dir, str(shard_index)),
477 False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700478
479 def finalize(self):
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700480 """Assembles and returns task summary JSON, shutdowns underlying Storage."""
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700481 with self._lock:
482 # Write an array of shard results with None for missing shards.
483 summary = {
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700484 'shards': [
485 self._per_shard_results.get(i) for i in xrange(self.shard_count)
486 ],
487 }
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700488 # Write summary.json to task_output_dir as well.
489 if self.task_output_dir:
490 tools.write_json(
491 os.path.join(self.task_output_dir, 'summary.json'),
492 summary,
493 False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700494 if self._storage:
495 self._storage.close()
496 self._storage = None
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700497 return summary
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700498
499 def _get_storage(self, isolate_server, namespace):
500 """Returns isolateserver.Storage to use to fetch files."""
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700501 assert self.task_output_dir
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700502 with self._lock:
503 if not self._storage:
504 self._storage = isolateserver.get_storage(isolate_server, namespace)
505 else:
506 # Shards must all use exact same isolate server and namespace.
507 if self._storage.location != isolate_server:
508 logging.error(
509 'Task shards are using multiple isolate servers: %s and %s',
510 self._storage.location, isolate_server)
511 return None
512 if self._storage.namespace != namespace:
513 logging.error(
514 'Task shards are using multiple namespaces: %s and %s',
515 self._storage.namespace, namespace)
516 return None
517 return self._storage
518
519
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700520def extract_output_files_location(task_log):
521 """Task log -> location of task output files to fetch.
522
523 TODO(vadimsh,maruel): Use side-channel to get this information.
524 See 'run_tha_test' in run_isolated.py for where the data is generated.
525
526 Returns:
527 Tuple (isolate server URL, namespace, isolated hash) on success.
528 None if information is missing or can not be parsed.
529 """
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400530 if not task_log:
531 return None
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700532 match = re.search(
533 r'\[run_isolated_out_hack\](.*)\[/run_isolated_out_hack\]',
534 task_log,
535 re.DOTALL)
536 if not match:
537 return None
538
539 def to_ascii(val):
540 if not isinstance(val, basestring):
541 raise ValueError()
542 return val.encode('ascii')
543
544 try:
545 data = json.loads(match.group(1))
546 if not isinstance(data, dict):
547 raise ValueError()
548 isolated_hash = to_ascii(data['hash'])
549 namespace = to_ascii(data['namespace'])
550 isolate_server = to_ascii(data['storage'])
551 if not file_path.is_url(isolate_server):
552 raise ValueError()
Kevin Graneyc2c3b9e2014-08-26 09:04:17 -0400553 data = {
554 'hash': isolated_hash,
555 'namespace': namespace,
556 'server': isolate_server,
557 'view_url': '%s/browse?%s' % (isolate_server, urllib.urlencode(
558 [('namespace', namespace), ('hash', isolated_hash)])),
559 }
560 return data
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700561 except (KeyError, ValueError):
562 logging.warning(
563 'Unexpected value of run_isolated_out_hack: %s', match.group(1))
564 return None
565
566
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500567def now():
568 """Exists so it can be mocked easily."""
569 return time.time()
570
571
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700572def retrieve_results(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400573 base_url, shard_index, task_id, timeout, should_stop, output_collector):
574 """Retrieves results for a single task ID.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700575
Vadim Shtayurab450c602014-05-12 19:23:25 -0700576 Returns:
577 <result dict> on success.
578 None on failure.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700579 """
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000580 assert isinstance(timeout, float), timeout
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400581 result_url = '%s/swarming/api/v1/client/task/%s' % (base_url, task_id)
582 output_url = '%s/swarming/api/v1/client/task/%s/output/all' % (
583 base_url, task_id)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700584 started = now()
585 deadline = started + timeout if timeout else None
586 attempt = 0
587
588 while not should_stop.is_set():
589 attempt += 1
590
591 # Waiting for too long -> give up.
592 current_time = now()
593 if deadline and current_time >= deadline:
594 logging.error('retrieve_results(%s) timed out on attempt %d',
595 base_url, attempt)
596 return None
597
598 # Do not spin too fast. Spin faster at the beginning though.
599 # Start with 1 sec delay and for each 30 sec of waiting add another second
600 # of delay, until hitting 15 sec ceiling.
601 if attempt > 1:
602 max_delay = min(15, 1 + (current_time - started) / 30.0)
603 delay = min(max_delay, deadline - current_time) if deadline else max_delay
604 if delay > 0:
605 logging.debug('Waiting %.1f sec before retrying', delay)
606 should_stop.wait(delay)
607 if should_stop.is_set():
608 return None
609
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400610 # Disable internal retries in net.url_read_json, since we are doing retries
611 # ourselves.
612 # TODO(maruel): We'd need to know if it's a 404 and not retry at all.
613 result = net.url_read_json(result_url, retry_50x=False)
614 if not result:
Marc-Antoine Ruel200b3952014-08-14 11:07:44 -0400615 continue
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400616 if result['state'] in State.STATES_NOT_RUNNING:
617 out = net.url_read_json(output_url)
618 result['outputs'] = (out or {}).get('outputs', [])
619 if not result['outputs']:
620 logging.error('No output found for task %s', task_id)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700621 # Record the result, try to fetch attached output files (if any).
622 if output_collector:
623 # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700624 output_collector.process_shard_result(shard_index, result)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700625 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +0000626
627
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700628def yield_results(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400629 swarm_base_url, task_ids, timeout, max_threads, print_status_updates,
630 output_collector):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500631 """Yields swarming task results from the swarming server as (index, result).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000632
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700633 Duplicate shards are ignored. Shards are yielded in order of completion.
634 Timed out shards are NOT yielded at all. Caller can compare number of yielded
635 shards with len(task_keys) to verify all shards completed.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000636
637 max_threads is optional and is used to limit the number of parallel fetches
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500638 done. Since in general the number of task_keys is in the range <=10, it's not
maruel@chromium.org0437a732013-08-27 16:05:52 +0000639 worth normally to limit the number threads. Mostly used for testing purposes.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500640
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700641 output_collector is an optional instance of TaskOutputCollector that will be
642 used to fetch files produced by a task from isolate server to the local disk.
643
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500644 Yields:
645 (index, result). In particular, 'result' is defined as the
646 GetRunnerResults() function in services/swarming/server/test_runner.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000647 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000648 number_threads = (
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400649 min(max_threads, len(task_ids)) if max_threads else len(task_ids))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700650 should_stop = threading.Event()
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700651 results_channel = threading_utils.TaskChannel()
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700652
maruel@chromium.org0437a732013-08-27 16:05:52 +0000653 with threading_utils.ThreadPool(number_threads, number_threads, 0) as pool:
654 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700655 # Adds a task to the thread pool to call 'retrieve_results' and return
656 # the results together with shard_index that produced them (as a tuple).
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400657 def enqueue_retrieve_results(shard_index, task_id):
Vadim Shtayurab450c602014-05-12 19:23:25 -0700658 task_fn = lambda *args: (shard_index, retrieve_results(*args))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000659 pool.add_task(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400660 0, results_channel.wrap_task(task_fn), swarm_base_url, shard_index,
661 task_id, timeout, should_stop, output_collector)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700662
663 # Enqueue 'retrieve_results' calls for each shard key to run in parallel.
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400664 for shard_index, task_id in enumerate(task_ids):
665 enqueue_retrieve_results(shard_index, task_id)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700666
667 # Wait for all of them to finish.
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400668 shards_remaining = range(len(task_ids))
669 active_task_count = len(task_ids)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700670 while active_task_count:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700671 shard_index, result = None, None
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700672 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700673 shard_index, result = results_channel.pull(
674 timeout=STATUS_UPDATE_INTERVAL)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700675 except threading_utils.TaskChannel.Timeout:
676 if print_status_updates:
677 print(
678 'Waiting for results from the following shards: %s' %
679 ', '.join(map(str, shards_remaining)))
680 sys.stdout.flush()
681 continue
682 except Exception:
683 logging.exception('Unexpected exception in retrieve_results')
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700684
685 # A call to 'retrieve_results' finished (successfully or not).
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700686 active_task_count -= 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000687 if not result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500688 logging.error('Failed to retrieve the results for a swarming key')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000689 continue
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700690
Vadim Shtayurab450c602014-05-12 19:23:25 -0700691 # Yield back results to the caller.
692 assert shard_index in shards_remaining
693 shards_remaining.remove(shard_index)
694 yield shard_index, result
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700695
maruel@chromium.org0437a732013-08-27 16:05:52 +0000696 finally:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700697 # Done or aborted with Ctrl+C, kill the remaining threads.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000698 should_stop.set()
699
700
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400701def decorate_shard_output(
702 swarming, shard_index, result, shard_exit_code, shard_duration):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000703 """Returns wrapped output for swarming task shard."""
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400704 url = '%s/user/task/%s' % (swarming, result['id'])
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400705 tag_header = 'Shard %d %s' % (shard_index, url)
Marc-Antoine Ruel9b17dae2014-10-17 16:28:43 -0400706 tag_footer = 'End of shard %d Duration: %.1fs Bot: %s Exit code %s' % (
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400707 shard_index, shard_duration, result['bot_id'], shard_exit_code)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400708
709 tag_len = max(len(tag_header), len(tag_footer))
710 dash_pad = '+-%s-+\n' % ('-' * tag_len)
711 tag_header = '| %s |\n' % tag_header.ljust(tag_len)
712 tag_footer = '| %s |\n' % tag_footer.ljust(tag_len)
713
714 header = dash_pad + tag_header + dash_pad
715 footer = dash_pad + tag_footer + dash_pad[:-1]
716 output = '\n'.join(o for o in result['outputs'] if o).rstrip() + '\n'
717 return header + output + footer
maruel@chromium.org0437a732013-08-27 16:05:52 +0000718
719
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700720def collect(
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400721 swarming, task_name, task_ids, timeout, decorate, print_status_updates,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400722 task_summary_json, task_output_dir):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500723 """Retrieves results of a Swarming task."""
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700724 # Collect summary JSON and output files (if task_output_dir is not None).
725 output_collector = TaskOutputCollector(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400726 task_output_dir, task_name, len(task_ids))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700727
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700728 seen_shards = set()
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400729 exit_code = 0
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400730 total_duration = 0
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700731 try:
732 for index, output in yield_results(
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400733 swarming, task_ids, timeout, None, print_status_updates,
734 output_collector):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700735 seen_shards.add(index)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700736
Marc-Antoine Ruel9b17dae2014-10-17 16:28:43 -0400737 # Grab first non-zero exit code as an overall shard exit code. Default to
738 # failure if there was no process that even started.
739 shard_exit_code = 1
740 shard_exit_codes = sorted(output['exit_codes'], key=lambda x: not x)
741 if shard_exit_codes:
742 shard_exit_code = shard_exit_codes[0]
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400743 if shard_exit_code:
744 exit_code = shard_exit_code
Vadim Shtayura473455a2014-05-14 15:22:35 -0700745
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400746 shard_duration = sum(i for i in output['durations'] if i)
747 total_duration += shard_duration
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700748 if decorate:
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400749 print(decorate_shard_output(
750 swarming, index, output, shard_exit_code, shard_duration))
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400751 if len(seen_shards) < len(task_ids):
752 print('')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700753 else:
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400754 print('%s: %s %d' % (output['bot_id'], output['id'], shard_exit_code))
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400755 for output in output['outputs']:
756 if not output:
757 continue
758 output = output.rstrip()
759 if output:
760 print(''.join(' %s\n' % l for l in output.splitlines()))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700761 finally:
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700762 summary = output_collector.finalize()
763 if task_summary_json:
764 tools.write_json(task_summary_json, summary, False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700765
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400766 if decorate and total_duration:
767 print('Total duration: %.1fs' % total_duration)
768
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400769 if len(seen_shards) != len(task_ids):
770 missing_shards = [x for x in range(len(task_ids)) if x not in seen_shards]
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700771 print >> sys.stderr, ('Results from some shards are missing: %s' %
772 ', '.join(map(str, missing_shards)))
Vadim Shtayurac524f512014-05-15 09:54:56 -0700773 return 1
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700774
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400775 return exit_code
maruel@chromium.org0437a732013-08-27 16:05:52 +0000776
777
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500778### Commands.
779
780
781def abort_task(_swarming, _manifest):
782 """Given a task manifest that was triggered, aborts its execution."""
783 # TODO(vadimsh): No supported by the server yet.
784
785
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400786def add_filter_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500787 parser.filter_group = tools.optparse.OptionGroup(parser, 'Filtering slaves')
788 parser.filter_group.add_option(
Marc-Antoine Ruelb39e8cf2014-01-20 10:39:31 -0500789 '-d', '--dimension', default=[], action='append', nargs=2,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500790 dest='dimensions', metavar='FOO bar',
791 help='dimension to filter on')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500792 parser.add_option_group(parser.filter_group)
793
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400794
Vadim Shtayurab450c602014-05-12 19:23:25 -0700795def add_sharding_options(parser):
796 parser.sharding_group = tools.optparse.OptionGroup(parser, 'Sharding options')
797 parser.sharding_group.add_option(
798 '--shards', type='int', default=1,
799 help='Number of shards to trigger and collect.')
800 parser.add_option_group(parser.sharding_group)
801
802
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400803def add_trigger_options(parser):
804 """Adds all options to trigger a task on Swarming."""
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -0500805 isolateserver.add_isolate_server_options(parser)
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400806 add_filter_options(parser)
807
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500808 parser.task_group = tools.optparse.OptionGroup(parser, 'Task properties')
809 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500810 '-e', '--env', default=[], action='append', nargs=2, metavar='FOO bar',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700811 help='Environment variables to set')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500812 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500813 '--priority', type='int', default=100,
814 help='The lower value, the more important the task is')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500815 parser.task_group.add_option(
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500816 '-T', '--task-name',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400817 help='Display name of the task. Defaults to '
818 '<base_name>/<dimensions>/<isolated hash>/<timestamp> if an '
819 'isolated file is provided, if a hash is provided, it defaults to '
820 '<user>/<dimensions>/<isolated hash>/<timestamp>')
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400821 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400822 '--tags', action='append', default=[],
823 help='Tags to assign to the task.')
824 parser.task_group.add_option(
Marc-Antoine Ruel686a2872014-12-05 10:06:29 -0500825 '--user', default='',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400826 help='User associated with the task. Defaults to authenticated user on '
827 'the server.')
828 parser.task_group.add_option(
Marc-Antoine Ruel02196392014-10-17 16:29:43 -0400829 '--idempotent', action='store_true', default=False,
830 help='When set, the server will actively try to find a previous task '
831 'with the same parameter and return this result instead if possible')
832 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400833 '--expiration', type='int', default=6*60*60,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400834 help='Seconds to allow the task to be pending for a bot to run before '
835 'this task request expires.')
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400836 parser.task_group.add_option(
Marc-Antoine Ruel77142812014-10-03 11:19:43 -0400837 '--deadline', type='int', dest='expiration',
838 help=tools.optparse.SUPPRESS_HELP)
839 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400840 '--hard-timeout', type='int', default=60*60,
841 help='Seconds to allow the task to complete.')
842 parser.task_group.add_option(
843 '--io-timeout', type='int', default=20*60,
844 help='Seconds to allow the task to be silent.')
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500845 parser.task_group.add_option(
846 '--raw-cmd', action='store_true', default=False,
847 help='When set, the command after -- is used as-is without run_isolated. '
848 'In this case, no .isolated file is expected.')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500849 parser.add_option_group(parser.task_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000850
851
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500852def process_trigger_options(parser, options, args):
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500853 """Processes trigger options and uploads files to isolate server if necessary.
854 """
855 options.dimensions = dict(options.dimensions)
856 options.env = dict(options.env)
857
858 data = []
859 if not options.dimensions:
860 parser.error('Please at least specify one --dimension')
861 if options.raw_cmd:
862 if not args:
863 parser.error(
864 'Arguments with --raw-cmd should be passed after -- as command '
865 'delimiter.')
866 if options.isolate_server:
867 parser.error('Can\'t use both --raw-cmd and --isolate-server.')
868
869 command = args
870 if not options.task_name:
871 options.task_name = '%s/%s' % (
872 options.user,
873 '_'.join(
874 '%s=%s' % (k, v)
875 for k, v in sorted(options.dimensions.iteritems())))
876 else:
877 isolateserver.process_isolate_server_options(parser, options, False)
878 try:
879 command, data = isolated_handle_options(options, args)
880 except ValueError as e:
881 parser.error(str(e))
882
883 return TaskRequest(
884 command=command,
885 data=data,
886 dimensions=options.dimensions,
887 env=options.env,
888 expiration=options.expiration,
889 hard_timeout=options.hard_timeout,
890 idempotent=options.idempotent,
891 io_timeout=options.io_timeout,
892 name=options.task_name,
893 priority=options.priority,
894 tags=options.tags,
895 user=options.user,
896 verbose=options.verbose)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000897
898
899def add_collect_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500900 parser.server_group.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000901 '-t', '--timeout',
902 type='float',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400903 default=80*60.,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000904 help='Timeout to wait for result, set to 0 for no timeout; default: '
905 '%default s')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500906 parser.group_logging.add_option(
907 '--decorate', action='store_true', help='Decorate output')
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700908 parser.group_logging.add_option(
909 '--print-status-updates', action='store_true',
910 help='Print periodic status updates')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700911 parser.task_output_group = tools.optparse.OptionGroup(parser, 'Task output')
912 parser.task_output_group.add_option(
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700913 '--task-summary-json',
914 metavar='FILE',
915 help='Dump a summary of task results to this file as json. It contains '
916 'only shards statuses as know to server directly. Any output files '
917 'emitted by the task can be collected by using --task-output-dir')
918 parser.task_output_group.add_option(
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700919 '--task-output-dir',
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700920 metavar='DIR',
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700921 help='Directory to put task results into. When the task finishes, this '
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700922 'directory contains per-shard directory with output files produced '
923 'by shards: <task-output-dir>/<zero-based-shard-index>/.')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700924 parser.add_option_group(parser.task_output_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000925
926
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400927def CMDbots(parser, args):
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400928 """Returns information about the bots connected to the Swarming server."""
929 add_filter_options(parser)
930 parser.filter_group.add_option(
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400931 '--dead-only', action='store_true',
932 help='Only print dead bots, useful to reap them and reimage broken bots')
933 parser.filter_group.add_option(
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400934 '-k', '--keep-dead', action='store_true',
935 help='Do not filter out dead bots')
936 parser.filter_group.add_option(
937 '-b', '--bare', action='store_true',
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400938 help='Do not print out dimensions')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400939 options, args = parser.parse_args(args)
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400940
941 if options.keep_dead and options.dead_only:
942 parser.error('Use only one of --keep-dead and --dead-only')
Vadim Shtayura6b555c12014-07-23 16:22:18 -0700943
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400944 bots = []
945 cursor = None
946 limit = 250
947 # Iterate via cursors.
948 base_url = options.swarming + '/swarming/api/v1/client/bots?limit=%d' % limit
949 while True:
950 url = base_url
951 if cursor:
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400952 url += '&cursor=%s' % urllib.quote(cursor)
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400953 data = net.url_read_json(url)
954 if data is None:
955 print >> sys.stderr, 'Failed to access %s' % options.swarming
956 return 1
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400957 bots.extend(data['items'])
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400958 cursor = data['cursor']
959 if not cursor:
960 break
961
962 for bot in natsort.natsorted(bots, key=lambda x: x['id']):
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400963 if options.dead_only:
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400964 if not bot['is_dead']:
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400965 continue
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400966 elif not options.keep_dead and bot['is_dead']:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400967 continue
968
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400969 # If the user requested to filter on dimensions, ensure the bot has all the
970 # dimensions requested.
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400971 dimensions = bot['dimensions']
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400972 for key, value in options.dimensions:
973 if key not in dimensions:
974 break
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400975 # A bot can have multiple value for a key, for example,
976 # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
977 # be accepted.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400978 if isinstance(dimensions[key], list):
979 if value not in dimensions[key]:
980 break
981 else:
982 if value != dimensions[key]:
983 break
984 else:
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400985 print bot['id']
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400986 if not options.bare:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -0400987 print ' %s' % json.dumps(dimensions, sort_keys=True)
Marc-Antoine Ruelfd491172014-11-19 19:26:13 -0500988 if bot.get('task_id'):
989 print ' task: %s' % bot['task_id']
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400990 return 0
991
992
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400993@subcommand.usage('--json file | task_id...')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400994def CMDcollect(parser, args):
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400995 """Retrieves results of one or multiple Swarming task by its ID.
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400996
997 The result can be in multiple part if the execution was sharded. It can
998 potentially have retries.
999 """
1000 add_collect_options(parser)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001001 parser.add_option(
1002 '-j', '--json',
1003 help='Load the task ids from .json as saved by trigger --dump-json')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001004 (options, args) = parser.parse_args(args)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001005 if not args and not options.json:
1006 parser.error('Must specify at least one task id or --json.')
1007 if args and options.json:
1008 parser.error('Only use one of task id or --json.')
1009
1010 if options.json:
1011 with open(options.json) as f:
1012 tasks = sorted(
1013 json.load(f)['tasks'].itervalues(), key=lambda x: x['shard_index'])
1014 args = [t['task_id'] for t in tasks]
1015 else:
1016 valid = frozenset('0123456789abcdef')
1017 if any(not valid.issuperset(task_id) for task_id in args):
1018 parser.error('Task ids are 0-9a-f.')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001019
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001020 try:
1021 return collect(
1022 options.swarming,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001023 None,
1024 args,
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001025 options.timeout,
1026 options.decorate,
1027 options.print_status_updates,
1028 options.task_summary_json,
1029 options.task_output_dir)
1030 except Failure:
1031 on_error.report(None)
1032 return 1
1033
1034
1035@subcommand.usage('[resource name]')
1036def CMDquery(parser, args):
1037 """Returns raw JSON information via an URL endpoint. Use 'list' to gather the
1038 list of valid values from the server.
1039
1040 Examples:
1041 Printing the list of known URLs:
1042 swarming.py query -S https://server-url list
1043
1044 Listing last 50 tasks on a specific bot named 'swarm1'
1045 swarming.py query -S https://server-url --limit 50 bot/swarm1/tasks
1046 """
1047 CHUNK_SIZE = 250
1048
1049 parser.add_option(
1050 '-L', '--limit', type='int', default=200,
1051 help='Limit to enforce on limitless items (like number of tasks); '
1052 'default=%default')
1053 (options, args) = parser.parse_args(args)
1054 if len(args) != 1:
1055 parser.error('Must specify only one resource name.')
1056
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001057 base_url = options.swarming + '/swarming/api/v1/client/' + args[0]
1058 url = base_url
1059 if options.limit:
Marc-Antoine Ruelea74f292014-10-24 20:55:39 -04001060 # Check check, change if not working out.
1061 merge_char = '&' if '?' in url else '?'
1062 url += '%slimit=%d' % (merge_char, min(CHUNK_SIZE, options.limit))
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001063 data = net.url_read_json(url)
1064 if data is None:
1065 print >> sys.stderr, 'Failed to access %s' % options.swarming
1066 return 1
1067
1068 # Some items support cursors. Try to get automatically if cursors are needed
1069 # by looking at the 'cursor' items.
1070 while (
1071 data.get('cursor') and
1072 (not options.limit or len(data['items']) < options.limit)):
1073 url = base_url + '?cursor=%s' % urllib.quote(data['cursor'])
1074 if options.limit:
1075 url += '&limit=%d' % min(CHUNK_SIZE, options.limit - len(data['items']))
1076 new = net.url_read_json(url)
1077 if new is None:
1078 print >> sys.stderr, 'Failed to access %s' % options.swarming
1079 return 1
1080 data['items'].extend(new['items'])
1081 data['cursor'] = new['cursor']
1082
1083 if options.limit and len(data.get('items', [])) > options.limit:
1084 data['items'] = data['items'][:options.limit]
1085 data.pop('cursor', None)
1086
1087 json.dump(data, sys.stdout, indent=2, sort_keys=True)
1088 sys.stdout.write('\n')
1089 return 0
1090
1091
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001092@subcommand.usage('(hash|isolated) [-- extra_args]')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001093def CMDrun(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001094 """Triggers a task and wait for the results.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001095
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001096 Basically, does everything to run a command remotely.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001097 """
1098 add_trigger_options(parser)
1099 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001100 add_sharding_options(parser)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001101 options, args = parser.parse_args(args)
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001102 task_request = process_trigger_options(parser, options, args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001103 try:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001104 tasks = trigger_task_shards(
1105 options.swarming, task_request, options.shards)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001106 except Failure as e:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001107 on_error.report(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001108 'Failed to trigger %s(%s): %s' %
1109 (options.task_name, args[0], e.args[0]))
1110 return 1
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001111 if not tasks:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001112 on_error.report('Failed to trigger the task.')
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001113 return 1
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001114 print('Triggered task: %s' % options.task_name)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001115 task_ids = [
1116 t['task_id']
1117 for t in sorted(tasks.itervalues(), key=lambda x: x['shard_index'])
1118 ]
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001119 try:
1120 return collect(
1121 options.swarming,
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001122 options.task_name,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001123 task_ids,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001124 options.timeout,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -07001125 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001126 options.print_status_updates,
Vadim Shtayurac8437bf2014-07-09 19:45:36 -07001127 options.task_summary_json,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001128 options.task_output_dir)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001129 except Failure:
1130 on_error.report(None)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001131 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001132
1133
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001134@subcommand.usage('task_id')
1135def CMDreproduce(parser, args):
1136 """Runs a task locally that was triggered on the server.
1137
1138 This running locally the same commands that have been run on the bot. The data
1139 downloaded will be in a subdirectory named 'work' of the current working
1140 directory.
1141 """
1142 options, args = parser.parse_args(args)
1143 if len(args) != 1:
1144 parser.error('Must specify exactly one task id.')
1145
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001146 url = options.swarming + '/swarming/api/v1/client/task/%s/request' % args[0]
1147 request = net.url_read_json(url)
1148 if not request:
1149 print >> sys.stderr, 'Failed to retrieve request data for the task'
1150 return 1
1151
1152 if not os.path.isdir('work'):
1153 os.mkdir('work')
1154
1155 swarming_host = urlparse.urlparse(options.swarming).netloc
1156 properties = request['properties']
1157 for data_url, _ in properties['data']:
1158 assert data_url.startswith('https://'), data_url
1159 data_host = urlparse.urlparse(data_url).netloc
1160 if data_host != swarming_host:
1161 auth.ensure_logged_in('https://' + data_host)
1162
1163 content = net.url_read(data_url)
1164 if content is None:
1165 print >> sys.stderr, 'Failed to download %s' % data_url
1166 return 1
1167 with zipfile.ZipFile(StringIO.StringIO(content)) as zip_file:
1168 zip_file.extractall('work')
1169
1170 env = None
1171 if properties['env']:
1172 env = os.environ.copy()
Marc-Antoine Ruel119b0842014-12-19 15:27:58 -05001173 logging.info('env: %r', properties['env'])
1174 env.update(
1175 (k.encode('utf-8'), v.encode('utf-8'))
1176 for k, v in properties['env'].iteritems())
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001177
1178 exit_code = 0
1179 for cmd in properties['commands']:
1180 try:
1181 c = subprocess.call(cmd, env=env, cwd='work')
1182 except OSError as e:
1183 print >> sys.stderr, 'Failed to run: %s' % ' '.join(cmd)
1184 print >> sys.stderr, str(e)
1185 c = 1
1186 if not exit_code:
1187 exit_code = c
1188 return exit_code
1189
1190
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001191@subcommand.usage("(hash|isolated) [-- extra_args|raw command]")
maruel@chromium.org0437a732013-08-27 16:05:52 +00001192def CMDtrigger(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001193 """Triggers a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001194
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001195 Accepts either the hash (sha1) of a .isolated file already uploaded or the
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001196 path to an .isolated file to archive.
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001197
1198 If an .isolated file is specified instead of an hash, it is first archived.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001199
1200 Passes all extra arguments provided after '--' as additional command line
1201 arguments for an isolated command specified in *.isolate file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001202 """
1203 add_trigger_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001204 add_sharding_options(parser)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001205 parser.add_option(
1206 '--dump-json',
1207 metavar='FILE',
1208 help='Dump details about the triggered task(s) to this file as json')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001209 options, args = parser.parse_args(args)
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001210 task_request = process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001211 try:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001212 tasks = trigger_task_shards(
1213 options.swarming, task_request, options.shards)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001214 if tasks:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001215 print('Triggered task: %s' % options.task_name)
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001216 tasks_sorted = sorted(
1217 tasks.itervalues(), key=lambda x: x['shard_index'])
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001218 if options.dump_json:
1219 data = {
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001220 'base_task_name': options.task_name,
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001221 'tasks': tasks,
1222 }
1223 tools.write_json(options.dump_json, data, True)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001224 print('To collect results, use:')
1225 print(' swarming.py collect -S %s --json %s' %
1226 (options.swarming, options.dump_json))
1227 else:
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001228 print('To collect results, use:')
1229 print(' swarming.py collect -S %s %s' %
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001230 (options.swarming, ' '.join(t['task_id'] for t in tasks_sorted)))
1231 print('Or visit:')
1232 for t in tasks_sorted:
1233 print(' ' + t['view_url'])
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001234 return int(not tasks)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001235 except Failure:
1236 on_error.report(None)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001237 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001238
1239
1240class OptionParserSwarming(tools.OptionParserWithLogging):
1241 def __init__(self, **kwargs):
1242 tools.OptionParserWithLogging.__init__(
1243 self, prog='swarming.py', **kwargs)
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001244 self.server_group = tools.optparse.OptionGroup(self, 'Server')
1245 self.server_group.add_option(
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001246 '-S', '--swarming',
Kevin Graney5346c162014-01-24 12:20:01 -05001247 metavar='URL', default=os.environ.get('SWARMING_SERVER', ''),
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001248 help='Swarming server to use')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001249 self.add_option_group(self.server_group)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001250 auth.add_auth_options(self)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001251
1252 def parse_args(self, *args, **kwargs):
1253 options, args = tools.OptionParserWithLogging.parse_args(
1254 self, *args, **kwargs)
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001255 auth.process_auth_options(self, options)
1256 user = self._process_swarming(options)
1257 if hasattr(options, 'user') and not options.user:
1258 options.user = user
1259 return options, args
1260
1261 def _process_swarming(self, options):
1262 """Processes the --swarming option and aborts if not specified.
1263
1264 Returns the identity as determined by the server.
1265 """
maruel@chromium.org0437a732013-08-27 16:05:52 +00001266 if not options.swarming:
1267 self.error('--swarming is required.')
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001268 try:
1269 options.swarming = net.fix_url(options.swarming)
1270 except ValueError as e:
1271 self.error('--swarming %s' % e)
1272 on_error.report_on_exception_exit(options.swarming)
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05001273 try:
1274 user = auth.ensure_logged_in(options.swarming)
1275 except ValueError as e:
1276 self.error(str(e))
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001277 return user
maruel@chromium.org0437a732013-08-27 16:05:52 +00001278
1279
1280def main(args):
1281 dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001282 return dispatcher.execute(OptionParserSwarming(version=__version__), args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001283
1284
1285if __name__ == '__main__':
1286 fix_encoding.fix_encoding()
1287 tools.disable_buffering()
1288 colorama.init()
1289 sys.exit(main(sys.argv[1:]))