blob: 0fb1ac05418e78ea4a813099d8ec335a4013c250 [file] [log] [blame]
maruel@chromium.org0437a732013-08-27 16:05:52 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00005
6"""Client tool to trigger tasks or retrieve results from a Swarming server."""
7
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -05008__version__ = '0.6.1'
maruel@chromium.org0437a732013-08-27 16:05:52 +00009
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -050010import collections
maruel@chromium.org0437a732013-08-27 16:05:52 +000011import json
12import logging
13import os
Vadim Shtayurae3fbd102014-04-29 17:05:21 -070014import re
maruel@chromium.org0437a732013-08-27 16:05:52 +000015import shutil
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -040016import StringIO
maruel@chromium.org0437a732013-08-27 16:05:52 +000017import subprocess
18import sys
Vadim Shtayurab19319e2014-04-27 08:50:06 -070019import threading
maruel@chromium.org0437a732013-08-27 16:05:52 +000020import time
21import urllib
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -040022import urlparse
23import zipfile
maruel@chromium.org0437a732013-08-27 16:05:52 +000024
25from third_party import colorama
26from third_party.depot_tools import fix_encoding
27from third_party.depot_tools import subcommand
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000028
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -050029from utils import file_path
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040030from third_party.chromium import natsort
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000031from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040032from utils import on_error
maruel@chromium.org0437a732013-08-27 16:05:52 +000033from utils import threading_utils
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000034from utils import tools
35from utils import zip_package
maruel@chromium.org0437a732013-08-27 16:05:52 +000036
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080037import auth
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040038import isolated_format
maruel@chromium.org7b844a62013-09-17 13:04:59 +000039import isolateserver
maruel@chromium.org0437a732013-08-27 16:05:52 +000040import run_isolated
41
42
43ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -050044
45
46class Failure(Exception):
47 """Generic failure."""
48 pass
49
50
51### Isolated file handling.
52
53
54def isolated_upload_zip_bundle(isolate_server, bundle):
55 """Uploads a zip package to Isolate Server and returns raw fetch URL.
56
57 Args:
58 isolate_server: URL of an Isolate Server.
59 bundle: instance of ZipPackage to upload.
60
61 Returns:
62 URL to get the file from.
63 """
64 # Swarming bot needs to be able to grab the file from the Isolate Server using
65 # a simple HTTPS GET. Use 'default' namespace so that the raw data returned to
66 # a bot is not zipped, since the swarming_bot doesn't understand compressed
67 # data. This namespace have nothing to do with |namespace| passed to
68 # run_isolated.py that is used to store files for isolated task.
69 logging.info('Zipping up and uploading files...')
70 start_time = time.time()
71 isolate_item = isolateserver.BufferItem(bundle.zip_into_buffer())
72 with isolateserver.get_storage(isolate_server, 'default') as storage:
73 uploaded = storage.upload_items([isolate_item])
74 bundle_url = storage.get_fetch_url(isolate_item)
75 elapsed = time.time() - start_time
76 if isolate_item in uploaded:
77 logging.info('Upload complete, time elapsed: %f', elapsed)
78 else:
79 logging.info('Zip file already on server, time elapsed: %f', elapsed)
80 return bundle_url
81
82
83def isolated_get_data(isolate_server):
84 """Returns the 'data' section with all files necessary to bootstrap a task
85 execution running an isolated task.
86
87 It's mainly zipping run_isolated.zip over and over again.
88 TODO(maruel): Get rid of this with.
89 https://code.google.com/p/swarming/issues/detail?id=173
90 """
91 bundle = zip_package.ZipPackage(ROOT_DIR)
92 bundle.add_buffer(
93 'run_isolated.zip',
94 run_isolated.get_as_zip_package().zip_into_buffer(compress=False))
95 bundle_url = isolated_upload_zip_bundle(isolate_server, bundle)
96 return [(bundle_url, 'swarm_data.zip')]
97
98
99def isolated_get_run_commands(
100 isolate_server, namespace, isolated_hash, extra_args, verbose):
101 """Returns the 'commands' to run an isolated task via run_isolated.zip.
102
103 Returns:
104 commands list to be added to the request.
105 """
106 run_cmd = [
107 'python', 'run_isolated.zip',
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500108 '--isolated', isolated_hash,
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500109 '--isolate-server', isolate_server,
110 '--namespace', namespace,
111 ]
112 if verbose:
113 run_cmd.append('--verbose')
114 # Pass all extra args for run_isolated.py, it will pass them to the command.
115 if extra_args:
116 run_cmd.append('--')
117 run_cmd.extend(extra_args)
118 return run_cmd
119
120
121def isolated_archive(isolate_server, namespace, isolated, algo, verbose):
122 """Archives a .isolated and all the dependencies on the Isolate Server."""
123 logging.info(
124 'isolated_archive(%s, %s, %s)', isolate_server, namespace, isolated)
125 print('Archiving: %s' % isolated)
126 cmd = [
127 sys.executable,
128 os.path.join(ROOT_DIR, 'isolate.py'),
129 'archive',
130 '--isolate-server', isolate_server,
131 '--namespace', namespace,
132 '--isolated', isolated,
133 ]
134 cmd.extend(['--verbose'] * verbose)
135 logging.info(' '.join(cmd))
136 if subprocess.call(cmd, verbose):
137 return None
138 return isolated_format.hash_file(isolated, algo)
139
140
141def isolated_to_hash(isolate_server, namespace, arg, algo, verbose):
142 """Archives a .isolated file if needed.
143
144 Returns the file hash to trigger and a bool specifying if it was a file (True)
145 or a hash (False).
146 """
147 if arg.endswith('.isolated'):
148 file_hash = isolated_archive(isolate_server, namespace, arg, algo, verbose)
149 if not file_hash:
150 on_error.report('Archival failure %s' % arg)
151 return None, True
152 return file_hash, True
153 elif isolated_format.is_valid_hash(arg, algo):
154 return arg, False
155 else:
156 on_error.report('Invalid hash %s' % arg)
157 return None, False
158
159
160def isolated_handle_options(options, args):
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500161 """Handles '--isolated <isolated>', '<isolated>' and '-- <args...>' arguments.
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500162
163 Returns:
164 tuple(command, data).
165 """
166 isolated_cmd_args = []
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500167 if not options.isolated:
168 if '--' in args:
169 index = args.index('--')
170 isolated_cmd_args = args[index+1:]
171 args = args[:index]
172 else:
173 # optparse eats '--' sometimes.
174 isolated_cmd_args = args[1:]
175 args = args[:1]
176 if len(args) != 1:
177 raise ValueError(
178 'Use --isolated, --raw-cmd or \'--\' to pass arguments to the called '
179 'process.')
180 # Old code. To be removed eventually.
181 options.isolated, is_file = isolated_to_hash(
182 options.isolate_server, options.namespace, args[0],
183 isolated_format.get_hash_algo(options.namespace), options.verbose)
184 if not options.isolated:
185 raise ValueError('Invalid argument %s' % args[0])
186 elif args:
187 is_file = False
188 if '--' in args:
189 index = args.index('--')
190 isolated_cmd_args = args[index+1:]
191 if index != 0:
192 raise ValueError('Unexpected arguments.')
193 else:
194 # optparse eats '--' sometimes.
195 isolated_cmd_args = args
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500196
197 command = isolated_get_run_commands(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500198 options.isolate_server, options.namespace, options.isolated,
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500199 isolated_cmd_args, options.verbose)
200
201 # If a file name was passed, use its base name of the isolated hash.
202 # Otherwise, use user name as an approximation of a task name.
203 if not options.task_name:
204 if is_file:
205 key = os.path.splitext(os.path.basename(args[0]))[0]
206 else:
207 key = options.user
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500208 options.task_name = u'%s/%s/%s' % (
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500209 key,
210 '_'.join(
211 '%s=%s' % (k, v)
212 for k, v in sorted(options.dimensions.iteritems())),
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500213 options.isolated)
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500214
215 try:
216 data = isolated_get_data(options.isolate_server)
217 except (IOError, OSError):
218 on_error.report('Failed to upload the zip file')
219 raise ValueError('Failed to upload the zip file')
220
221 return command, data
222
223
224### Triggering.
225
226
227TaskRequest = collections.namedtuple(
228 'TaskRequest',
229 [
230 'command',
231 'data',
232 'dimensions',
233 'env',
234 'expiration',
235 'hard_timeout',
236 'idempotent',
237 'io_timeout',
238 'name',
239 'priority',
240 'tags',
241 'user',
242 'verbose',
243 ])
244
245
246def task_request_to_raw_request(task_request):
247 """Returns the json dict expected by the Swarming server for new request.
248
249 This is for the v1 client Swarming API.
250 """
251 return {
252 'name': task_request.name,
Marc-Antoine Rueld863df32015-01-24 20:34:48 -0500253 'parent_task_id': os.environ.get('SWARMING_TASK_ID', ''),
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500254 'priority': task_request.priority,
255 'properties': {
256 'commands': [task_request.command],
257 'data': task_request.data,
258 'dimensions': task_request.dimensions,
259 'env': task_request.env,
260 'execution_timeout_secs': task_request.hard_timeout,
261 'io_timeout_secs': task_request.io_timeout,
262 'idempotent': task_request.idempotent,
263 },
264 'scheduling_expiration_secs': task_request.expiration,
265 'tags': task_request.tags,
266 'user': task_request.user,
267 }
268
269
270def swarming_handshake(swarming):
271 """Initiates the connection to the Swarming server."""
272 headers = {'X-XSRF-Token-Request': '1'}
273 response = net.url_read_json(
274 swarming + '/swarming/api/v1/client/handshake',
275 headers=headers,
276 data={})
277 if not response:
278 logging.error('Failed to handshake with server')
279 return None
280 logging.info('Connected to server version: %s', response['server_version'])
281 return response['xsrf_token']
282
283
284def swarming_trigger(swarming, raw_request, xsrf_token):
285 """Triggers a request on the Swarming server and returns the json data.
286
287 It's the low-level function.
288
289 Returns:
290 {
291 'request': {
292 'created_ts': u'2010-01-02 03:04:05',
293 'name': ..
294 },
295 'task_id': '12300',
296 }
297 """
298 logging.info('Triggering: %s', raw_request['name'])
299
300 headers = {'X-XSRF-Token': xsrf_token}
301 result = net.url_read_json(
302 swarming + '/swarming/api/v1/client/request',
303 data=raw_request,
304 headers=headers)
305 if not result:
306 on_error.report('Failed to trigger task %s' % raw_request['name'])
307 return None
308 return result
309
310
311def setup_googletest(env, shards, index):
312 """Sets googletest specific environment variables."""
313 if shards > 1:
314 env = env.copy()
315 env['GTEST_SHARD_INDEX'] = str(index)
316 env['GTEST_TOTAL_SHARDS'] = str(shards)
317 return env
318
319
320def trigger_task_shards(swarming, task_request, shards):
321 """Triggers one or many subtasks of a sharded task.
322
323 Returns:
324 Dict with task details, returned to caller as part of --dump-json output.
325 None in case of failure.
326 """
327 def convert(index):
328 req = task_request
329 if shards > 1:
330 req = req._replace(
331 env=setup_googletest(req.env, shards, index),
332 name='%s:%s:%s' % (req.name, index, shards))
333 return task_request_to_raw_request(req)
334
335 requests = [convert(index) for index in xrange(shards)]
336 xsrf_token = swarming_handshake(swarming)
337 if not xsrf_token:
338 return None
339 tasks = {}
340 priority_warning = False
341 for index, request in enumerate(requests):
342 task = swarming_trigger(swarming, request, xsrf_token)
343 if not task:
344 break
345 logging.info('Request result: %s', task)
346 if (not priority_warning and
347 task['request']['priority'] != task_request.priority):
348 priority_warning = True
349 print >> sys.stderr, (
350 'Priority was reset to %s' % task['request']['priority'])
351 tasks[request['name']] = {
352 'shard_index': index,
353 'task_id': task['task_id'],
354 'view_url': '%s/user/task/%s' % (swarming, task['task_id']),
355 }
356
357 # Some shards weren't triggered. Abort everything.
358 if len(tasks) != len(requests):
359 if tasks:
360 print >> sys.stderr, 'Only %d shard(s) out of %d were triggered' % (
361 len(tasks), len(requests))
362 for task_dict in tasks.itervalues():
363 abort_task(swarming, task_dict['task_id'])
364 return None
365
366 return tasks
367
368
369### Collection.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000370
371
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700372# How often to print status updates to stdout in 'collect'.
373STATUS_UPDATE_INTERVAL = 15 * 60.
374
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400375
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400376class State(object):
377 """States in which a task can be.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000378
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400379 WARNING: Copy-pasted from appengine/swarming/server/task_result.py. These
380 values are part of the API so if they change, the API changed.
381
382 It's in fact an enum. Values should be in decreasing order of importance.
383 """
384 RUNNING = 0x10
385 PENDING = 0x20
386 EXPIRED = 0x30
387 TIMED_OUT = 0x40
388 BOT_DIED = 0x50
389 CANCELED = 0x60
390 COMPLETED = 0x70
391
392 STATES = (RUNNING, PENDING, EXPIRED, TIMED_OUT, BOT_DIED, CANCELED, COMPLETED)
393 STATES_RUNNING = (RUNNING, PENDING)
394 STATES_NOT_RUNNING = (EXPIRED, TIMED_OUT, BOT_DIED, CANCELED, COMPLETED)
395 STATES_DONE = (TIMED_OUT, COMPLETED)
396 STATES_ABANDONED = (EXPIRED, BOT_DIED, CANCELED)
397
398 _NAMES = {
399 RUNNING: 'Running',
400 PENDING: 'Pending',
401 EXPIRED: 'Expired',
402 TIMED_OUT: 'Execution timed out',
403 BOT_DIED: 'Bot died',
404 CANCELED: 'User canceled',
405 COMPLETED: 'Completed',
406 }
407
408 @classmethod
409 def to_string(cls, state):
410 """Returns a user-readable string representing a State."""
411 if state not in cls._NAMES:
412 raise ValueError('Invalid state %s' % state)
413 return cls._NAMES[state]
maruel@chromium.org0437a732013-08-27 16:05:52 +0000414
415
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700416class TaskOutputCollector(object):
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700417 """Assembles task execution summary (for --task-summary-json output).
418
419 Optionally fetches task outputs from isolate server to local disk (used when
420 --task-output-dir is passed).
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700421
422 This object is shared among multiple threads running 'retrieve_results'
423 function, in particular they call 'process_shard_result' method in parallel.
424 """
425
426 def __init__(self, task_output_dir, task_name, shard_count):
427 """Initializes TaskOutputCollector, ensures |task_output_dir| exists.
428
429 Args:
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700430 task_output_dir: (optional) local directory to put fetched files to.
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700431 task_name: name of the swarming task results belong to.
432 shard_count: expected number of task shards.
433 """
434 self.task_output_dir = task_output_dir
435 self.task_name = task_name
436 self.shard_count = shard_count
437
438 self._lock = threading.Lock()
439 self._per_shard_results = {}
440 self._storage = None
441
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700442 if self.task_output_dir and not os.path.isdir(self.task_output_dir):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700443 os.makedirs(self.task_output_dir)
444
Vadim Shtayurab450c602014-05-12 19:23:25 -0700445 def process_shard_result(self, shard_index, result):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700446 """Stores results of a single task shard, fetches output files if necessary.
447
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400448 Modifies |result| in place.
449
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700450 Called concurrently from multiple threads.
451 """
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700452 # Sanity check index is in expected range.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700453 assert isinstance(shard_index, int)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700454 if shard_index < 0 or shard_index >= self.shard_count:
455 logging.warning(
456 'Shard index %d is outside of expected range: [0; %d]',
457 shard_index, self.shard_count - 1)
458 return
459
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400460 assert not 'isolated_out' in result
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400461 result['isolated_out'] = None
462 for output in result['outputs']:
463 isolated_files_location = extract_output_files_location(output)
464 if isolated_files_location:
465 if result['isolated_out']:
466 raise ValueError('Unexpected two task with output')
467 result['isolated_out'] = isolated_files_location
Kevin Graneyc2c3b9e2014-08-26 09:04:17 -0400468
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700469 # Store result dict of that shard, ignore results we've already seen.
470 with self._lock:
471 if shard_index in self._per_shard_results:
472 logging.warning('Ignoring duplicate shard index %d', shard_index)
473 return
474 self._per_shard_results[shard_index] = result
475
476 # Fetch output files if necessary.
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400477 if self.task_output_dir and result['isolated_out']:
478 storage = self._get_storage(
479 result['isolated_out']['server'],
480 result['isolated_out']['namespace'])
481 if storage:
482 # Output files are supposed to be small and they are not reused across
483 # tasks. So use MemoryCache for them instead of on-disk cache. Make
484 # files writable, so that calling script can delete them.
485 isolateserver.fetch_isolated(
486 result['isolated_out']['hash'],
487 storage,
488 isolateserver.MemoryCache(file_mode_mask=0700),
489 os.path.join(self.task_output_dir, str(shard_index)),
490 False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700491
492 def finalize(self):
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700493 """Assembles and returns task summary JSON, shutdowns underlying Storage."""
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700494 with self._lock:
495 # Write an array of shard results with None for missing shards.
496 summary = {
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700497 'shards': [
498 self._per_shard_results.get(i) for i in xrange(self.shard_count)
499 ],
500 }
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700501 # Write summary.json to task_output_dir as well.
502 if self.task_output_dir:
503 tools.write_json(
504 os.path.join(self.task_output_dir, 'summary.json'),
505 summary,
506 False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700507 if self._storage:
508 self._storage.close()
509 self._storage = None
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700510 return summary
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700511
512 def _get_storage(self, isolate_server, namespace):
513 """Returns isolateserver.Storage to use to fetch files."""
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700514 assert self.task_output_dir
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700515 with self._lock:
516 if not self._storage:
517 self._storage = isolateserver.get_storage(isolate_server, namespace)
518 else:
519 # Shards must all use exact same isolate server and namespace.
520 if self._storage.location != isolate_server:
521 logging.error(
522 'Task shards are using multiple isolate servers: %s and %s',
523 self._storage.location, isolate_server)
524 return None
525 if self._storage.namespace != namespace:
526 logging.error(
527 'Task shards are using multiple namespaces: %s and %s',
528 self._storage.namespace, namespace)
529 return None
530 return self._storage
531
532
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700533def extract_output_files_location(task_log):
534 """Task log -> location of task output files to fetch.
535
536 TODO(vadimsh,maruel): Use side-channel to get this information.
537 See 'run_tha_test' in run_isolated.py for where the data is generated.
538
539 Returns:
540 Tuple (isolate server URL, namespace, isolated hash) on success.
541 None if information is missing or can not be parsed.
542 """
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400543 if not task_log:
544 return None
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700545 match = re.search(
546 r'\[run_isolated_out_hack\](.*)\[/run_isolated_out_hack\]',
547 task_log,
548 re.DOTALL)
549 if not match:
550 return None
551
552 def to_ascii(val):
553 if not isinstance(val, basestring):
554 raise ValueError()
555 return val.encode('ascii')
556
557 try:
558 data = json.loads(match.group(1))
559 if not isinstance(data, dict):
560 raise ValueError()
561 isolated_hash = to_ascii(data['hash'])
562 namespace = to_ascii(data['namespace'])
563 isolate_server = to_ascii(data['storage'])
564 if not file_path.is_url(isolate_server):
565 raise ValueError()
Kevin Graneyc2c3b9e2014-08-26 09:04:17 -0400566 data = {
567 'hash': isolated_hash,
568 'namespace': namespace,
569 'server': isolate_server,
570 'view_url': '%s/browse?%s' % (isolate_server, urllib.urlencode(
571 [('namespace', namespace), ('hash', isolated_hash)])),
572 }
573 return data
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700574 except (KeyError, ValueError):
575 logging.warning(
576 'Unexpected value of run_isolated_out_hack: %s', match.group(1))
577 return None
578
579
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500580def now():
581 """Exists so it can be mocked easily."""
582 return time.time()
583
584
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700585def retrieve_results(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400586 base_url, shard_index, task_id, timeout, should_stop, output_collector):
587 """Retrieves results for a single task ID.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700588
Vadim Shtayurab450c602014-05-12 19:23:25 -0700589 Returns:
590 <result dict> on success.
591 None on failure.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700592 """
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000593 assert isinstance(timeout, float), timeout
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400594 result_url = '%s/swarming/api/v1/client/task/%s' % (base_url, task_id)
595 output_url = '%s/swarming/api/v1/client/task/%s/output/all' % (
596 base_url, task_id)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700597 started = now()
598 deadline = started + timeout if timeout else None
599 attempt = 0
600
601 while not should_stop.is_set():
602 attempt += 1
603
604 # Waiting for too long -> give up.
605 current_time = now()
606 if deadline and current_time >= deadline:
607 logging.error('retrieve_results(%s) timed out on attempt %d',
608 base_url, attempt)
609 return None
610
611 # Do not spin too fast. Spin faster at the beginning though.
612 # Start with 1 sec delay and for each 30 sec of waiting add another second
613 # of delay, until hitting 15 sec ceiling.
614 if attempt > 1:
615 max_delay = min(15, 1 + (current_time - started) / 30.0)
616 delay = min(max_delay, deadline - current_time) if deadline else max_delay
617 if delay > 0:
618 logging.debug('Waiting %.1f sec before retrying', delay)
619 should_stop.wait(delay)
620 if should_stop.is_set():
621 return None
622
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400623 # Disable internal retries in net.url_read_json, since we are doing retries
624 # ourselves.
625 # TODO(maruel): We'd need to know if it's a 404 and not retry at all.
626 result = net.url_read_json(result_url, retry_50x=False)
627 if not result:
Marc-Antoine Ruel200b3952014-08-14 11:07:44 -0400628 continue
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400629 if result['state'] in State.STATES_NOT_RUNNING:
630 out = net.url_read_json(output_url)
631 result['outputs'] = (out or {}).get('outputs', [])
632 if not result['outputs']:
633 logging.error('No output found for task %s', task_id)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700634 # Record the result, try to fetch attached output files (if any).
635 if output_collector:
636 # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700637 output_collector.process_shard_result(shard_index, result)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700638 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +0000639
640
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700641def yield_results(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400642 swarm_base_url, task_ids, timeout, max_threads, print_status_updates,
643 output_collector):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500644 """Yields swarming task results from the swarming server as (index, result).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000645
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700646 Duplicate shards are ignored. Shards are yielded in order of completion.
647 Timed out shards are NOT yielded at all. Caller can compare number of yielded
648 shards with len(task_keys) to verify all shards completed.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000649
650 max_threads is optional and is used to limit the number of parallel fetches
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500651 done. Since in general the number of task_keys is in the range <=10, it's not
maruel@chromium.org0437a732013-08-27 16:05:52 +0000652 worth normally to limit the number threads. Mostly used for testing purposes.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500653
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700654 output_collector is an optional instance of TaskOutputCollector that will be
655 used to fetch files produced by a task from isolate server to the local disk.
656
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500657 Yields:
658 (index, result). In particular, 'result' is defined as the
659 GetRunnerResults() function in services/swarming/server/test_runner.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000660 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000661 number_threads = (
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400662 min(max_threads, len(task_ids)) if max_threads else len(task_ids))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700663 should_stop = threading.Event()
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700664 results_channel = threading_utils.TaskChannel()
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700665
maruel@chromium.org0437a732013-08-27 16:05:52 +0000666 with threading_utils.ThreadPool(number_threads, number_threads, 0) as pool:
667 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700668 # Adds a task to the thread pool to call 'retrieve_results' and return
669 # the results together with shard_index that produced them (as a tuple).
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400670 def enqueue_retrieve_results(shard_index, task_id):
Vadim Shtayurab450c602014-05-12 19:23:25 -0700671 task_fn = lambda *args: (shard_index, retrieve_results(*args))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000672 pool.add_task(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400673 0, results_channel.wrap_task(task_fn), swarm_base_url, shard_index,
674 task_id, timeout, should_stop, output_collector)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700675
676 # Enqueue 'retrieve_results' calls for each shard key to run in parallel.
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400677 for shard_index, task_id in enumerate(task_ids):
678 enqueue_retrieve_results(shard_index, task_id)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700679
680 # Wait for all of them to finish.
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400681 shards_remaining = range(len(task_ids))
682 active_task_count = len(task_ids)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700683 while active_task_count:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700684 shard_index, result = None, None
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700685 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700686 shard_index, result = results_channel.pull(
687 timeout=STATUS_UPDATE_INTERVAL)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700688 except threading_utils.TaskChannel.Timeout:
689 if print_status_updates:
690 print(
691 'Waiting for results from the following shards: %s' %
692 ', '.join(map(str, shards_remaining)))
693 sys.stdout.flush()
694 continue
695 except Exception:
696 logging.exception('Unexpected exception in retrieve_results')
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700697
698 # A call to 'retrieve_results' finished (successfully or not).
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700699 active_task_count -= 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000700 if not result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500701 logging.error('Failed to retrieve the results for a swarming key')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000702 continue
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700703
Vadim Shtayurab450c602014-05-12 19:23:25 -0700704 # Yield back results to the caller.
705 assert shard_index in shards_remaining
706 shards_remaining.remove(shard_index)
707 yield shard_index, result
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700708
maruel@chromium.org0437a732013-08-27 16:05:52 +0000709 finally:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700710 # Done or aborted with Ctrl+C, kill the remaining threads.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000711 should_stop.set()
712
713
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400714def decorate_shard_output(
715 swarming, shard_index, result, shard_exit_code, shard_duration):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000716 """Returns wrapped output for swarming task shard."""
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400717 url = '%s/user/task/%s' % (swarming, result['id'])
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400718 tag_header = 'Shard %d %s' % (shard_index, url)
Marc-Antoine Ruel9b17dae2014-10-17 16:28:43 -0400719 tag_footer = 'End of shard %d Duration: %.1fs Bot: %s Exit code %s' % (
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400720 shard_index, shard_duration, result['bot_id'], shard_exit_code)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400721
722 tag_len = max(len(tag_header), len(tag_footer))
723 dash_pad = '+-%s-+\n' % ('-' * tag_len)
724 tag_header = '| %s |\n' % tag_header.ljust(tag_len)
725 tag_footer = '| %s |\n' % tag_footer.ljust(tag_len)
726
727 header = dash_pad + tag_header + dash_pad
728 footer = dash_pad + tag_footer + dash_pad[:-1]
729 output = '\n'.join(o for o in result['outputs'] if o).rstrip() + '\n'
730 return header + output + footer
maruel@chromium.org0437a732013-08-27 16:05:52 +0000731
732
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700733def collect(
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400734 swarming, task_name, task_ids, timeout, decorate, print_status_updates,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400735 task_summary_json, task_output_dir):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500736 """Retrieves results of a Swarming task."""
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700737 # Collect summary JSON and output files (if task_output_dir is not None).
738 output_collector = TaskOutputCollector(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400739 task_output_dir, task_name, len(task_ids))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700740
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700741 seen_shards = set()
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400742 exit_code = 0
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400743 total_duration = 0
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700744 try:
745 for index, output in yield_results(
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400746 swarming, task_ids, timeout, None, print_status_updates,
747 output_collector):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700748 seen_shards.add(index)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700749
Marc-Antoine Ruel9b17dae2014-10-17 16:28:43 -0400750 # Grab first non-zero exit code as an overall shard exit code. Default to
751 # failure if there was no process that even started.
752 shard_exit_code = 1
753 shard_exit_codes = sorted(output['exit_codes'], key=lambda x: not x)
754 if shard_exit_codes:
755 shard_exit_code = shard_exit_codes[0]
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400756 if shard_exit_code:
757 exit_code = shard_exit_code
Vadim Shtayura473455a2014-05-14 15:22:35 -0700758
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400759 shard_duration = sum(i for i in output['durations'] if i)
760 total_duration += shard_duration
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700761 if decorate:
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400762 print(decorate_shard_output(
763 swarming, index, output, shard_exit_code, shard_duration))
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400764 if len(seen_shards) < len(task_ids):
765 print('')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700766 else:
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400767 print('%s: %s %d' % (output['bot_id'], output['id'], shard_exit_code))
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400768 for output in output['outputs']:
769 if not output:
770 continue
771 output = output.rstrip()
772 if output:
773 print(''.join(' %s\n' % l for l in output.splitlines()))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700774 finally:
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700775 summary = output_collector.finalize()
776 if task_summary_json:
777 tools.write_json(task_summary_json, summary, False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700778
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400779 if decorate and total_duration:
780 print('Total duration: %.1fs' % total_duration)
781
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400782 if len(seen_shards) != len(task_ids):
783 missing_shards = [x for x in range(len(task_ids)) if x not in seen_shards]
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700784 print >> sys.stderr, ('Results from some shards are missing: %s' %
785 ', '.join(map(str, missing_shards)))
Vadim Shtayurac524f512014-05-15 09:54:56 -0700786 return 1
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700787
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400788 return exit_code
maruel@chromium.org0437a732013-08-27 16:05:52 +0000789
790
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500791### Commands.
792
793
794def abort_task(_swarming, _manifest):
795 """Given a task manifest that was triggered, aborts its execution."""
796 # TODO(vadimsh): No supported by the server yet.
797
798
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400799def add_filter_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500800 parser.filter_group = tools.optparse.OptionGroup(parser, 'Filtering slaves')
801 parser.filter_group.add_option(
Marc-Antoine Ruelb39e8cf2014-01-20 10:39:31 -0500802 '-d', '--dimension', default=[], action='append', nargs=2,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500803 dest='dimensions', metavar='FOO bar',
804 help='dimension to filter on')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500805 parser.add_option_group(parser.filter_group)
806
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400807
Vadim Shtayurab450c602014-05-12 19:23:25 -0700808def add_sharding_options(parser):
809 parser.sharding_group = tools.optparse.OptionGroup(parser, 'Sharding options')
810 parser.sharding_group.add_option(
811 '--shards', type='int', default=1,
812 help='Number of shards to trigger and collect.')
813 parser.add_option_group(parser.sharding_group)
814
815
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400816def add_trigger_options(parser):
817 """Adds all options to trigger a task on Swarming."""
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -0500818 isolateserver.add_isolate_server_options(parser)
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400819 add_filter_options(parser)
820
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500821 parser.task_group = tools.optparse.OptionGroup(parser, 'Task properties')
822 parser.task_group.add_option(
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500823 '-s', '--isolated',
824 help='Hash of the .isolated to grab from the isolate server')
825 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500826 '-e', '--env', default=[], action='append', nargs=2, metavar='FOO bar',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700827 help='Environment variables to set')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500828 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500829 '--priority', type='int', default=100,
830 help='The lower value, the more important the task is')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500831 parser.task_group.add_option(
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500832 '-T', '--task-name',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400833 help='Display name of the task. Defaults to '
834 '<base_name>/<dimensions>/<isolated hash>/<timestamp> if an '
835 'isolated file is provided, if a hash is provided, it defaults to '
836 '<user>/<dimensions>/<isolated hash>/<timestamp>')
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400837 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400838 '--tags', action='append', default=[],
839 help='Tags to assign to the task.')
840 parser.task_group.add_option(
Marc-Antoine Ruel686a2872014-12-05 10:06:29 -0500841 '--user', default='',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400842 help='User associated with the task. Defaults to authenticated user on '
843 'the server.')
844 parser.task_group.add_option(
Marc-Antoine Ruel02196392014-10-17 16:29:43 -0400845 '--idempotent', action='store_true', default=False,
846 help='When set, the server will actively try to find a previous task '
847 'with the same parameter and return this result instead if possible')
848 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400849 '--expiration', type='int', default=6*60*60,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400850 help='Seconds to allow the task to be pending for a bot to run before '
851 'this task request expires.')
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400852 parser.task_group.add_option(
Marc-Antoine Ruel77142812014-10-03 11:19:43 -0400853 '--deadline', type='int', dest='expiration',
854 help=tools.optparse.SUPPRESS_HELP)
855 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400856 '--hard-timeout', type='int', default=60*60,
857 help='Seconds to allow the task to complete.')
858 parser.task_group.add_option(
859 '--io-timeout', type='int', default=20*60,
860 help='Seconds to allow the task to be silent.')
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500861 parser.task_group.add_option(
862 '--raw-cmd', action='store_true', default=False,
863 help='When set, the command after -- is used as-is without run_isolated. '
864 'In this case, no .isolated file is expected.')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500865 parser.add_option_group(parser.task_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000866
867
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500868def process_trigger_options(parser, options, args):
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500869 """Processes trigger options and uploads files to isolate server if necessary.
870 """
871 options.dimensions = dict(options.dimensions)
872 options.env = dict(options.env)
873
874 data = []
875 if not options.dimensions:
876 parser.error('Please at least specify one --dimension')
877 if options.raw_cmd:
878 if not args:
879 parser.error(
880 'Arguments with --raw-cmd should be passed after -- as command '
881 'delimiter.')
882 if options.isolate_server:
883 parser.error('Can\'t use both --raw-cmd and --isolate-server.')
884
885 command = args
886 if not options.task_name:
Marc-Antoine Ruel185ded42015-01-28 20:49:18 -0500887 options.task_name = u'%s/%s' % (
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500888 options.user,
889 '_'.join(
890 '%s=%s' % (k, v)
891 for k, v in sorted(options.dimensions.iteritems())))
892 else:
893 isolateserver.process_isolate_server_options(parser, options, False)
894 try:
895 command, data = isolated_handle_options(options, args)
896 except ValueError as e:
897 parser.error(str(e))
898
899 return TaskRequest(
900 command=command,
901 data=data,
902 dimensions=options.dimensions,
903 env=options.env,
904 expiration=options.expiration,
905 hard_timeout=options.hard_timeout,
906 idempotent=options.idempotent,
907 io_timeout=options.io_timeout,
908 name=options.task_name,
909 priority=options.priority,
910 tags=options.tags,
911 user=options.user,
912 verbose=options.verbose)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000913
914
915def add_collect_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500916 parser.server_group.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000917 '-t', '--timeout',
918 type='float',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400919 default=80*60.,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000920 help='Timeout to wait for result, set to 0 for no timeout; default: '
921 '%default s')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500922 parser.group_logging.add_option(
923 '--decorate', action='store_true', help='Decorate output')
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700924 parser.group_logging.add_option(
925 '--print-status-updates', action='store_true',
926 help='Print periodic status updates')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700927 parser.task_output_group = tools.optparse.OptionGroup(parser, 'Task output')
928 parser.task_output_group.add_option(
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700929 '--task-summary-json',
930 metavar='FILE',
931 help='Dump a summary of task results to this file as json. It contains '
932 'only shards statuses as know to server directly. Any output files '
933 'emitted by the task can be collected by using --task-output-dir')
934 parser.task_output_group.add_option(
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700935 '--task-output-dir',
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700936 metavar='DIR',
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700937 help='Directory to put task results into. When the task finishes, this '
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700938 'directory contains per-shard directory with output files produced '
939 'by shards: <task-output-dir>/<zero-based-shard-index>/.')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700940 parser.add_option_group(parser.task_output_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000941
942
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400943def CMDbots(parser, args):
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400944 """Returns information about the bots connected to the Swarming server."""
945 add_filter_options(parser)
946 parser.filter_group.add_option(
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400947 '--dead-only', action='store_true',
948 help='Only print dead bots, useful to reap them and reimage broken bots')
949 parser.filter_group.add_option(
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400950 '-k', '--keep-dead', action='store_true',
951 help='Do not filter out dead bots')
952 parser.filter_group.add_option(
953 '-b', '--bare', action='store_true',
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400954 help='Do not print out dimensions')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400955 options, args = parser.parse_args(args)
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400956
957 if options.keep_dead and options.dead_only:
958 parser.error('Use only one of --keep-dead and --dead-only')
Vadim Shtayura6b555c12014-07-23 16:22:18 -0700959
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400960 bots = []
961 cursor = None
962 limit = 250
963 # Iterate via cursors.
964 base_url = options.swarming + '/swarming/api/v1/client/bots?limit=%d' % limit
965 while True:
966 url = base_url
967 if cursor:
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400968 url += '&cursor=%s' % urllib.quote(cursor)
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400969 data = net.url_read_json(url)
970 if data is None:
971 print >> sys.stderr, 'Failed to access %s' % options.swarming
972 return 1
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400973 bots.extend(data['items'])
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400974 cursor = data['cursor']
975 if not cursor:
976 break
977
978 for bot in natsort.natsorted(bots, key=lambda x: x['id']):
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400979 if options.dead_only:
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400980 if not bot['is_dead']:
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400981 continue
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400982 elif not options.keep_dead and bot['is_dead']:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400983 continue
984
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400985 # If the user requested to filter on dimensions, ensure the bot has all the
986 # dimensions requested.
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400987 dimensions = bot['dimensions']
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400988 for key, value in options.dimensions:
989 if key not in dimensions:
990 break
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400991 # A bot can have multiple value for a key, for example,
992 # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
993 # be accepted.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400994 if isinstance(dimensions[key], list):
995 if value not in dimensions[key]:
996 break
997 else:
998 if value != dimensions[key]:
999 break
1000 else:
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -04001001 print bot['id']
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -04001002 if not options.bare:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -04001003 print ' %s' % json.dumps(dimensions, sort_keys=True)
Marc-Antoine Ruelfd491172014-11-19 19:26:13 -05001004 if bot.get('task_id'):
1005 print ' task: %s' % bot['task_id']
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -04001006 return 0
1007
1008
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001009@subcommand.usage('--json file | task_id...')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001010def CMDcollect(parser, args):
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001011 """Retrieves results of one or multiple Swarming task by its ID.
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001012
1013 The result can be in multiple part if the execution was sharded. It can
1014 potentially have retries.
1015 """
1016 add_collect_options(parser)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001017 parser.add_option(
1018 '-j', '--json',
1019 help='Load the task ids from .json as saved by trigger --dump-json')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001020 (options, args) = parser.parse_args(args)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001021 if not args and not options.json:
1022 parser.error('Must specify at least one task id or --json.')
1023 if args and options.json:
1024 parser.error('Only use one of task id or --json.')
1025
1026 if options.json:
Marc-Antoine Ruel9025a782015-03-17 16:42:59 -04001027 try:
1028 with open(options.json) as f:
1029 tasks = sorted(
1030 json.load(f)['tasks'].itervalues(), key=lambda x: x['shard_index'])
1031 args = [t['task_id'] for t in tasks]
1032 except (KeyError, IOError, ValueError):
1033 parser.error('Failed to parse %s' % options.json)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001034 else:
1035 valid = frozenset('0123456789abcdef')
1036 if any(not valid.issuperset(task_id) for task_id in args):
1037 parser.error('Task ids are 0-9a-f.')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001038
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001039 try:
1040 return collect(
1041 options.swarming,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001042 None,
1043 args,
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001044 options.timeout,
1045 options.decorate,
1046 options.print_status_updates,
1047 options.task_summary_json,
1048 options.task_output_dir)
1049 except Failure:
1050 on_error.report(None)
1051 return 1
1052
1053
1054@subcommand.usage('[resource name]')
1055def CMDquery(parser, args):
1056 """Returns raw JSON information via an URL endpoint. Use 'list' to gather the
1057 list of valid values from the server.
1058
1059 Examples:
1060 Printing the list of known URLs:
1061 swarming.py query -S https://server-url list
1062
1063 Listing last 50 tasks on a specific bot named 'swarm1'
1064 swarming.py query -S https://server-url --limit 50 bot/swarm1/tasks
1065 """
1066 CHUNK_SIZE = 250
1067
1068 parser.add_option(
1069 '-L', '--limit', type='int', default=200,
1070 help='Limit to enforce on limitless items (like number of tasks); '
1071 'default=%default')
Paweł Hajdan, Jr53ef0132015-03-20 17:49:18 +01001072 parser.add_option(
1073 '--json', help='Path to JSON output file (otherwise prints to stdout)')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001074 (options, args) = parser.parse_args(args)
1075 if len(args) != 1:
1076 parser.error('Must specify only one resource name.')
1077
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001078 base_url = options.swarming + '/swarming/api/v1/client/' + args[0]
1079 url = base_url
1080 if options.limit:
Marc-Antoine Ruelea74f292014-10-24 20:55:39 -04001081 # Check check, change if not working out.
1082 merge_char = '&' if '?' in url else '?'
1083 url += '%slimit=%d' % (merge_char, min(CHUNK_SIZE, options.limit))
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001084 data = net.url_read_json(url)
1085 if data is None:
1086 print >> sys.stderr, 'Failed to access %s' % options.swarming
1087 return 1
1088
1089 # Some items support cursors. Try to get automatically if cursors are needed
1090 # by looking at the 'cursor' items.
1091 while (
1092 data.get('cursor') and
1093 (not options.limit or len(data['items']) < options.limit)):
1094 url = base_url + '?cursor=%s' % urllib.quote(data['cursor'])
1095 if options.limit:
1096 url += '&limit=%d' % min(CHUNK_SIZE, options.limit - len(data['items']))
1097 new = net.url_read_json(url)
1098 if new is None:
1099 print >> sys.stderr, 'Failed to access %s' % options.swarming
1100 return 1
1101 data['items'].extend(new['items'])
1102 data['cursor'] = new['cursor']
1103
1104 if options.limit and len(data.get('items', [])) > options.limit:
1105 data['items'] = data['items'][:options.limit]
1106 data.pop('cursor', None)
1107
Paweł Hajdan, Jr53ef0132015-03-20 17:49:18 +01001108 if options.json:
1109 with open(options.json, 'w') as f:
1110 json.dump(data, f)
1111 else:
1112 json.dump(data, sys.stdout, indent=2, sort_keys=True)
1113 sys.stdout.write('\n')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001114 return 0
1115
1116
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001117@subcommand.usage('(hash|isolated) [-- extra_args]')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001118def CMDrun(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001119 """Triggers a task and wait for the results.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001120
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001121 Basically, does everything to run a command remotely.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001122 """
1123 add_trigger_options(parser)
1124 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001125 add_sharding_options(parser)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001126 options, args = parser.parse_args(args)
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001127 task_request = process_trigger_options(parser, options, args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001128 try:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001129 tasks = trigger_task_shards(
1130 options.swarming, task_request, options.shards)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001131 except Failure as e:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001132 on_error.report(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001133 'Failed to trigger %s(%s): %s' %
1134 (options.task_name, args[0], e.args[0]))
1135 return 1
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001136 if not tasks:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001137 on_error.report('Failed to trigger the task.')
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001138 return 1
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001139 print('Triggered task: %s' % options.task_name)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001140 task_ids = [
1141 t['task_id']
1142 for t in sorted(tasks.itervalues(), key=lambda x: x['shard_index'])
1143 ]
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001144 try:
1145 return collect(
1146 options.swarming,
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001147 options.task_name,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001148 task_ids,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001149 options.timeout,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -07001150 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001151 options.print_status_updates,
Vadim Shtayurac8437bf2014-07-09 19:45:36 -07001152 options.task_summary_json,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001153 options.task_output_dir)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001154 except Failure:
1155 on_error.report(None)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001156 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001157
1158
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001159@subcommand.usage('task_id')
1160def CMDreproduce(parser, args):
1161 """Runs a task locally that was triggered on the server.
1162
1163 This running locally the same commands that have been run on the bot. The data
1164 downloaded will be in a subdirectory named 'work' of the current working
1165 directory.
1166 """
1167 options, args = parser.parse_args(args)
1168 if len(args) != 1:
1169 parser.error('Must specify exactly one task id.')
1170
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001171 url = options.swarming + '/swarming/api/v1/client/task/%s/request' % args[0]
1172 request = net.url_read_json(url)
1173 if not request:
1174 print >> sys.stderr, 'Failed to retrieve request data for the task'
1175 return 1
1176
1177 if not os.path.isdir('work'):
1178 os.mkdir('work')
1179
1180 swarming_host = urlparse.urlparse(options.swarming).netloc
1181 properties = request['properties']
1182 for data_url, _ in properties['data']:
1183 assert data_url.startswith('https://'), data_url
1184 data_host = urlparse.urlparse(data_url).netloc
1185 if data_host != swarming_host:
1186 auth.ensure_logged_in('https://' + data_host)
1187
1188 content = net.url_read(data_url)
1189 if content is None:
1190 print >> sys.stderr, 'Failed to download %s' % data_url
1191 return 1
1192 with zipfile.ZipFile(StringIO.StringIO(content)) as zip_file:
1193 zip_file.extractall('work')
1194
1195 env = None
1196 if properties['env']:
1197 env = os.environ.copy()
Marc-Antoine Ruel119b0842014-12-19 15:27:58 -05001198 logging.info('env: %r', properties['env'])
1199 env.update(
1200 (k.encode('utf-8'), v.encode('utf-8'))
1201 for k, v in properties['env'].iteritems())
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001202
1203 exit_code = 0
1204 for cmd in properties['commands']:
1205 try:
1206 c = subprocess.call(cmd, env=env, cwd='work')
1207 except OSError as e:
1208 print >> sys.stderr, 'Failed to run: %s' % ' '.join(cmd)
1209 print >> sys.stderr, str(e)
1210 c = 1
1211 if not exit_code:
1212 exit_code = c
1213 return exit_code
1214
1215
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001216@subcommand.usage("(hash|isolated) [-- extra_args|raw command]")
maruel@chromium.org0437a732013-08-27 16:05:52 +00001217def CMDtrigger(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001218 """Triggers a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001219
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001220 Accepts either the hash (sha1) of a .isolated file already uploaded or the
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001221 path to an .isolated file to archive.
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001222
1223 If an .isolated file is specified instead of an hash, it is first archived.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001224
1225 Passes all extra arguments provided after '--' as additional command line
1226 arguments for an isolated command specified in *.isolate file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001227 """
1228 add_trigger_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001229 add_sharding_options(parser)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001230 parser.add_option(
1231 '--dump-json',
1232 metavar='FILE',
1233 help='Dump details about the triggered task(s) to this file as json')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001234 options, args = parser.parse_args(args)
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001235 task_request = process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001236 try:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001237 tasks = trigger_task_shards(
1238 options.swarming, task_request, options.shards)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001239 if tasks:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001240 print('Triggered task: %s' % options.task_name)
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001241 tasks_sorted = sorted(
1242 tasks.itervalues(), key=lambda x: x['shard_index'])
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001243 if options.dump_json:
1244 data = {
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001245 'base_task_name': options.task_name,
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001246 'tasks': tasks,
1247 }
1248 tools.write_json(options.dump_json, data, True)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001249 print('To collect results, use:')
1250 print(' swarming.py collect -S %s --json %s' %
1251 (options.swarming, options.dump_json))
1252 else:
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001253 print('To collect results, use:')
1254 print(' swarming.py collect -S %s %s' %
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001255 (options.swarming, ' '.join(t['task_id'] for t in tasks_sorted)))
1256 print('Or visit:')
1257 for t in tasks_sorted:
1258 print(' ' + t['view_url'])
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001259 return int(not tasks)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001260 except Failure:
1261 on_error.report(None)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001262 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001263
1264
1265class OptionParserSwarming(tools.OptionParserWithLogging):
1266 def __init__(self, **kwargs):
1267 tools.OptionParserWithLogging.__init__(
1268 self, prog='swarming.py', **kwargs)
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001269 self.server_group = tools.optparse.OptionGroup(self, 'Server')
1270 self.server_group.add_option(
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001271 '-S', '--swarming',
Kevin Graney5346c162014-01-24 12:20:01 -05001272 metavar='URL', default=os.environ.get('SWARMING_SERVER', ''),
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001273 help='Swarming server to use')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001274 self.add_option_group(self.server_group)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001275 auth.add_auth_options(self)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001276
1277 def parse_args(self, *args, **kwargs):
1278 options, args = tools.OptionParserWithLogging.parse_args(
1279 self, *args, **kwargs)
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001280 auth.process_auth_options(self, options)
1281 user = self._process_swarming(options)
1282 if hasattr(options, 'user') and not options.user:
1283 options.user = user
1284 return options, args
1285
1286 def _process_swarming(self, options):
1287 """Processes the --swarming option and aborts if not specified.
1288
1289 Returns the identity as determined by the server.
1290 """
maruel@chromium.org0437a732013-08-27 16:05:52 +00001291 if not options.swarming:
1292 self.error('--swarming is required.')
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001293 try:
1294 options.swarming = net.fix_url(options.swarming)
1295 except ValueError as e:
1296 self.error('--swarming %s' % e)
1297 on_error.report_on_exception_exit(options.swarming)
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05001298 try:
1299 user = auth.ensure_logged_in(options.swarming)
1300 except ValueError as e:
1301 self.error(str(e))
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001302 return user
maruel@chromium.org0437a732013-08-27 16:05:52 +00001303
1304
1305def main(args):
1306 dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001307 return dispatcher.execute(OptionParserSwarming(version=__version__), args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001308
1309
1310if __name__ == '__main__':
1311 fix_encoding.fix_encoding()
1312 tools.disable_buffering()
1313 colorama.init()
1314 sys.exit(main(sys.argv[1:]))