blob: 98af89a95a6add6405d5fbeb9612f2e62b4bc73d [file] [log] [blame]
maruel@chromium.org0437a732013-08-27 16:05:52 +00001#!/usr/bin/env python
Marc-Antoine Ruel8add1242013-11-05 17:28:27 -05002# Copyright 2013 The Swarming Authors. All rights reserved.
Marc-Antoine Ruele98b1122013-11-05 20:27:57 -05003# Use of this source code is governed under the Apache License, Version 2.0 that
4# can be found in the LICENSE file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00005
6"""Client tool to trigger tasks or retrieve results from a Swarming server."""
7
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05008__version__ = '0.6'
maruel@chromium.org0437a732013-08-27 16:05:52 +00009
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -050010import collections
maruel@chromium.org0437a732013-08-27 16:05:52 +000011import json
12import logging
13import os
Vadim Shtayurae3fbd102014-04-29 17:05:21 -070014import re
maruel@chromium.org0437a732013-08-27 16:05:52 +000015import shutil
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -040016import StringIO
maruel@chromium.org0437a732013-08-27 16:05:52 +000017import subprocess
18import sys
Vadim Shtayurab19319e2014-04-27 08:50:06 -070019import threading
maruel@chromium.org0437a732013-08-27 16:05:52 +000020import time
21import urllib
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -040022import urlparse
23import zipfile
maruel@chromium.org0437a732013-08-27 16:05:52 +000024
25from third_party import colorama
26from third_party.depot_tools import fix_encoding
27from third_party.depot_tools import subcommand
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000028
Marc-Antoine Ruel8806e622014-02-12 14:15:53 -050029from utils import file_path
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -040030from third_party.chromium import natsort
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000031from utils import net
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -040032from utils import on_error
maruel@chromium.org0437a732013-08-27 16:05:52 +000033from utils import threading_utils
vadimsh@chromium.org6b706212013-08-28 15:03:46 +000034from utils import tools
35from utils import zip_package
maruel@chromium.org0437a732013-08-27 16:05:52 +000036
Vadim Shtayurae34e13a2014-02-02 11:23:26 -080037import auth
Marc-Antoine Ruel8bee66d2014-08-28 19:02:07 -040038import isolated_format
maruel@chromium.org7b844a62013-09-17 13:04:59 +000039import isolateserver
maruel@chromium.org0437a732013-08-27 16:05:52 +000040import run_isolated
41
42
43ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -050044
45
46class Failure(Exception):
47 """Generic failure."""
48 pass
49
50
51### Isolated file handling.
52
53
54def isolated_upload_zip_bundle(isolate_server, bundle):
55 """Uploads a zip package to Isolate Server and returns raw fetch URL.
56
57 Args:
58 isolate_server: URL of an Isolate Server.
59 bundle: instance of ZipPackage to upload.
60
61 Returns:
62 URL to get the file from.
63 """
64 # Swarming bot needs to be able to grab the file from the Isolate Server using
65 # a simple HTTPS GET. Use 'default' namespace so that the raw data returned to
66 # a bot is not zipped, since the swarming_bot doesn't understand compressed
67 # data. This namespace have nothing to do with |namespace| passed to
68 # run_isolated.py that is used to store files for isolated task.
69 logging.info('Zipping up and uploading files...')
70 start_time = time.time()
71 isolate_item = isolateserver.BufferItem(bundle.zip_into_buffer())
72 with isolateserver.get_storage(isolate_server, 'default') as storage:
73 uploaded = storage.upload_items([isolate_item])
74 bundle_url = storage.get_fetch_url(isolate_item)
75 elapsed = time.time() - start_time
76 if isolate_item in uploaded:
77 logging.info('Upload complete, time elapsed: %f', elapsed)
78 else:
79 logging.info('Zip file already on server, time elapsed: %f', elapsed)
80 return bundle_url
81
82
83def isolated_get_data(isolate_server):
84 """Returns the 'data' section with all files necessary to bootstrap a task
85 execution running an isolated task.
86
87 It's mainly zipping run_isolated.zip over and over again.
88 TODO(maruel): Get rid of this with.
89 https://code.google.com/p/swarming/issues/detail?id=173
90 """
91 bundle = zip_package.ZipPackage(ROOT_DIR)
92 bundle.add_buffer(
93 'run_isolated.zip',
94 run_isolated.get_as_zip_package().zip_into_buffer(compress=False))
95 bundle_url = isolated_upload_zip_bundle(isolate_server, bundle)
96 return [(bundle_url, 'swarm_data.zip')]
97
98
99def isolated_get_run_commands(
100 isolate_server, namespace, isolated_hash, extra_args, verbose):
101 """Returns the 'commands' to run an isolated task via run_isolated.zip.
102
103 Returns:
104 commands list to be added to the request.
105 """
106 run_cmd = [
107 'python', 'run_isolated.zip',
108 '--hash', isolated_hash,
109 '--isolate-server', isolate_server,
110 '--namespace', namespace,
111 ]
112 if verbose:
113 run_cmd.append('--verbose')
114 # Pass all extra args for run_isolated.py, it will pass them to the command.
115 if extra_args:
116 run_cmd.append('--')
117 run_cmd.extend(extra_args)
118 return run_cmd
119
120
121def isolated_archive(isolate_server, namespace, isolated, algo, verbose):
122 """Archives a .isolated and all the dependencies on the Isolate Server."""
123 logging.info(
124 'isolated_archive(%s, %s, %s)', isolate_server, namespace, isolated)
125 print('Archiving: %s' % isolated)
126 cmd = [
127 sys.executable,
128 os.path.join(ROOT_DIR, 'isolate.py'),
129 'archive',
130 '--isolate-server', isolate_server,
131 '--namespace', namespace,
132 '--isolated', isolated,
133 ]
134 cmd.extend(['--verbose'] * verbose)
135 logging.info(' '.join(cmd))
136 if subprocess.call(cmd, verbose):
137 return None
138 return isolated_format.hash_file(isolated, algo)
139
140
141def isolated_to_hash(isolate_server, namespace, arg, algo, verbose):
142 """Archives a .isolated file if needed.
143
144 Returns the file hash to trigger and a bool specifying if it was a file (True)
145 or a hash (False).
146 """
147 if arg.endswith('.isolated'):
148 file_hash = isolated_archive(isolate_server, namespace, arg, algo, verbose)
149 if not file_hash:
150 on_error.report('Archival failure %s' % arg)
151 return None, True
152 return file_hash, True
153 elif isolated_format.is_valid_hash(arg, algo):
154 return arg, False
155 else:
156 on_error.report('Invalid hash %s' % arg)
157 return None, False
158
159
160def isolated_handle_options(options, args):
161 """Handles isolated arguments.
162
163 Returns:
164 tuple(command, data).
165 """
166 isolated_cmd_args = []
167 if '--' in args:
168 index = args.index('--')
169 isolated_cmd_args = args[index+1:]
170 args = args[:index]
171 else:
172 # optparse eats '--' sometimes.
173 isolated_cmd_args = args[1:]
174 args = args[:1]
175 if len(args) != 1:
176 raise ValueError('Must pass one .isolated file or its hash (sha1).')
177
178 isolated_hash, is_file = isolated_to_hash(
179 options.isolate_server, options.namespace, args[0],
180 isolated_format.get_hash_algo(options.namespace), options.verbose)
181 if not isolated_hash:
182 raise ValueError('Invalid argument %s' % args[0])
183
184 command = isolated_get_run_commands(
185 options.isolate_server, options.namespace, isolated_hash,
186 isolated_cmd_args, options.verbose)
187
188 # If a file name was passed, use its base name of the isolated hash.
189 # Otherwise, use user name as an approximation of a task name.
190 if not options.task_name:
191 if is_file:
192 key = os.path.splitext(os.path.basename(args[0]))[0]
193 else:
194 key = options.user
195 options.task_name = '%s/%s/%s' % (
196 key,
197 '_'.join(
198 '%s=%s' % (k, v)
199 for k, v in sorted(options.dimensions.iteritems())),
200 isolated_hash)
201
202 try:
203 data = isolated_get_data(options.isolate_server)
204 except (IOError, OSError):
205 on_error.report('Failed to upload the zip file')
206 raise ValueError('Failed to upload the zip file')
207
208 return command, data
209
210
211### Triggering.
212
213
214TaskRequest = collections.namedtuple(
215 'TaskRequest',
216 [
217 'command',
218 'data',
219 'dimensions',
220 'env',
221 'expiration',
222 'hard_timeout',
223 'idempotent',
224 'io_timeout',
225 'name',
226 'priority',
227 'tags',
228 'user',
229 'verbose',
230 ])
231
232
233def task_request_to_raw_request(task_request):
234 """Returns the json dict expected by the Swarming server for new request.
235
236 This is for the v1 client Swarming API.
237 """
238 return {
239 'name': task_request.name,
240 'priority': task_request.priority,
241 'properties': {
242 'commands': [task_request.command],
243 'data': task_request.data,
244 'dimensions': task_request.dimensions,
245 'env': task_request.env,
246 'execution_timeout_secs': task_request.hard_timeout,
247 'io_timeout_secs': task_request.io_timeout,
248 'idempotent': task_request.idempotent,
249 },
250 'scheduling_expiration_secs': task_request.expiration,
251 'tags': task_request.tags,
252 'user': task_request.user,
253 }
254
255
256def swarming_handshake(swarming):
257 """Initiates the connection to the Swarming server."""
258 headers = {'X-XSRF-Token-Request': '1'}
259 response = net.url_read_json(
260 swarming + '/swarming/api/v1/client/handshake',
261 headers=headers,
262 data={})
263 if not response:
264 logging.error('Failed to handshake with server')
265 return None
266 logging.info('Connected to server version: %s', response['server_version'])
267 return response['xsrf_token']
268
269
270def swarming_trigger(swarming, raw_request, xsrf_token):
271 """Triggers a request on the Swarming server and returns the json data.
272
273 It's the low-level function.
274
275 Returns:
276 {
277 'request': {
278 'created_ts': u'2010-01-02 03:04:05',
279 'name': ..
280 },
281 'task_id': '12300',
282 }
283 """
284 logging.info('Triggering: %s', raw_request['name'])
285
286 headers = {'X-XSRF-Token': xsrf_token}
287 result = net.url_read_json(
288 swarming + '/swarming/api/v1/client/request',
289 data=raw_request,
290 headers=headers)
291 if not result:
292 on_error.report('Failed to trigger task %s' % raw_request['name'])
293 return None
294 return result
295
296
297def setup_googletest(env, shards, index):
298 """Sets googletest specific environment variables."""
299 if shards > 1:
300 env = env.copy()
301 env['GTEST_SHARD_INDEX'] = str(index)
302 env['GTEST_TOTAL_SHARDS'] = str(shards)
303 return env
304
305
306def trigger_task_shards(swarming, task_request, shards):
307 """Triggers one or many subtasks of a sharded task.
308
309 Returns:
310 Dict with task details, returned to caller as part of --dump-json output.
311 None in case of failure.
312 """
313 def convert(index):
314 req = task_request
315 if shards > 1:
316 req = req._replace(
317 env=setup_googletest(req.env, shards, index),
318 name='%s:%s:%s' % (req.name, index, shards))
319 return task_request_to_raw_request(req)
320
321 requests = [convert(index) for index in xrange(shards)]
322 xsrf_token = swarming_handshake(swarming)
323 if not xsrf_token:
324 return None
325 tasks = {}
326 priority_warning = False
327 for index, request in enumerate(requests):
328 task = swarming_trigger(swarming, request, xsrf_token)
329 if not task:
330 break
331 logging.info('Request result: %s', task)
332 if (not priority_warning and
333 task['request']['priority'] != task_request.priority):
334 priority_warning = True
335 print >> sys.stderr, (
336 'Priority was reset to %s' % task['request']['priority'])
337 tasks[request['name']] = {
338 'shard_index': index,
339 'task_id': task['task_id'],
340 'view_url': '%s/user/task/%s' % (swarming, task['task_id']),
341 }
342
343 # Some shards weren't triggered. Abort everything.
344 if len(tasks) != len(requests):
345 if tasks:
346 print >> sys.stderr, 'Only %d shard(s) out of %d were triggered' % (
347 len(tasks), len(requests))
348 for task_dict in tasks.itervalues():
349 abort_task(swarming, task_dict['task_id'])
350 return None
351
352 return tasks
353
354
355### Collection.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000356
357
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700358# How often to print status updates to stdout in 'collect'.
359STATUS_UPDATE_INTERVAL = 15 * 60.
360
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400361
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400362class State(object):
363 """States in which a task can be.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000364
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400365 WARNING: Copy-pasted from appengine/swarming/server/task_result.py. These
366 values are part of the API so if they change, the API changed.
367
368 It's in fact an enum. Values should be in decreasing order of importance.
369 """
370 RUNNING = 0x10
371 PENDING = 0x20
372 EXPIRED = 0x30
373 TIMED_OUT = 0x40
374 BOT_DIED = 0x50
375 CANCELED = 0x60
376 COMPLETED = 0x70
377
378 STATES = (RUNNING, PENDING, EXPIRED, TIMED_OUT, BOT_DIED, CANCELED, COMPLETED)
379 STATES_RUNNING = (RUNNING, PENDING)
380 STATES_NOT_RUNNING = (EXPIRED, TIMED_OUT, BOT_DIED, CANCELED, COMPLETED)
381 STATES_DONE = (TIMED_OUT, COMPLETED)
382 STATES_ABANDONED = (EXPIRED, BOT_DIED, CANCELED)
383
384 _NAMES = {
385 RUNNING: 'Running',
386 PENDING: 'Pending',
387 EXPIRED: 'Expired',
388 TIMED_OUT: 'Execution timed out',
389 BOT_DIED: 'Bot died',
390 CANCELED: 'User canceled',
391 COMPLETED: 'Completed',
392 }
393
394 @classmethod
395 def to_string(cls, state):
396 """Returns a user-readable string representing a State."""
397 if state not in cls._NAMES:
398 raise ValueError('Invalid state %s' % state)
399 return cls._NAMES[state]
maruel@chromium.org0437a732013-08-27 16:05:52 +0000400
401
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700402class TaskOutputCollector(object):
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700403 """Assembles task execution summary (for --task-summary-json output).
404
405 Optionally fetches task outputs from isolate server to local disk (used when
406 --task-output-dir is passed).
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700407
408 This object is shared among multiple threads running 'retrieve_results'
409 function, in particular they call 'process_shard_result' method in parallel.
410 """
411
412 def __init__(self, task_output_dir, task_name, shard_count):
413 """Initializes TaskOutputCollector, ensures |task_output_dir| exists.
414
415 Args:
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700416 task_output_dir: (optional) local directory to put fetched files to.
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700417 task_name: name of the swarming task results belong to.
418 shard_count: expected number of task shards.
419 """
420 self.task_output_dir = task_output_dir
421 self.task_name = task_name
422 self.shard_count = shard_count
423
424 self._lock = threading.Lock()
425 self._per_shard_results = {}
426 self._storage = None
427
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700428 if self.task_output_dir and not os.path.isdir(self.task_output_dir):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700429 os.makedirs(self.task_output_dir)
430
Vadim Shtayurab450c602014-05-12 19:23:25 -0700431 def process_shard_result(self, shard_index, result):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700432 """Stores results of a single task shard, fetches output files if necessary.
433
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400434 Modifies |result| in place.
435
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700436 Called concurrently from multiple threads.
437 """
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700438 # Sanity check index is in expected range.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700439 assert isinstance(shard_index, int)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700440 if shard_index < 0 or shard_index >= self.shard_count:
441 logging.warning(
442 'Shard index %d is outside of expected range: [0; %d]',
443 shard_index, self.shard_count - 1)
444 return
445
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400446 assert not 'isolated_out' in result
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400447 result['isolated_out'] = None
448 for output in result['outputs']:
449 isolated_files_location = extract_output_files_location(output)
450 if isolated_files_location:
451 if result['isolated_out']:
452 raise ValueError('Unexpected two task with output')
453 result['isolated_out'] = isolated_files_location
Kevin Graneyc2c3b9e2014-08-26 09:04:17 -0400454
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700455 # Store result dict of that shard, ignore results we've already seen.
456 with self._lock:
457 if shard_index in self._per_shard_results:
458 logging.warning('Ignoring duplicate shard index %d', shard_index)
459 return
460 self._per_shard_results[shard_index] = result
461
462 # Fetch output files if necessary.
Marc-Antoine Ruele4dcbb82014-10-01 09:30:56 -0400463 if self.task_output_dir and result['isolated_out']:
464 storage = self._get_storage(
465 result['isolated_out']['server'],
466 result['isolated_out']['namespace'])
467 if storage:
468 # Output files are supposed to be small and they are not reused across
469 # tasks. So use MemoryCache for them instead of on-disk cache. Make
470 # files writable, so that calling script can delete them.
471 isolateserver.fetch_isolated(
472 result['isolated_out']['hash'],
473 storage,
474 isolateserver.MemoryCache(file_mode_mask=0700),
475 os.path.join(self.task_output_dir, str(shard_index)),
476 False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700477
478 def finalize(self):
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700479 """Assembles and returns task summary JSON, shutdowns underlying Storage."""
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700480 with self._lock:
481 # Write an array of shard results with None for missing shards.
482 summary = {
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700483 'shards': [
484 self._per_shard_results.get(i) for i in xrange(self.shard_count)
485 ],
486 }
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700487 # Write summary.json to task_output_dir as well.
488 if self.task_output_dir:
489 tools.write_json(
490 os.path.join(self.task_output_dir, 'summary.json'),
491 summary,
492 False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700493 if self._storage:
494 self._storage.close()
495 self._storage = None
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700496 return summary
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700497
498 def _get_storage(self, isolate_server, namespace):
499 """Returns isolateserver.Storage to use to fetch files."""
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700500 assert self.task_output_dir
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700501 with self._lock:
502 if not self._storage:
503 self._storage = isolateserver.get_storage(isolate_server, namespace)
504 else:
505 # Shards must all use exact same isolate server and namespace.
506 if self._storage.location != isolate_server:
507 logging.error(
508 'Task shards are using multiple isolate servers: %s and %s',
509 self._storage.location, isolate_server)
510 return None
511 if self._storage.namespace != namespace:
512 logging.error(
513 'Task shards are using multiple namespaces: %s and %s',
514 self._storage.namespace, namespace)
515 return None
516 return self._storage
517
518
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700519def extract_output_files_location(task_log):
520 """Task log -> location of task output files to fetch.
521
522 TODO(vadimsh,maruel): Use side-channel to get this information.
523 See 'run_tha_test' in run_isolated.py for where the data is generated.
524
525 Returns:
526 Tuple (isolate server URL, namespace, isolated hash) on success.
527 None if information is missing or can not be parsed.
528 """
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400529 if not task_log:
530 return None
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700531 match = re.search(
532 r'\[run_isolated_out_hack\](.*)\[/run_isolated_out_hack\]',
533 task_log,
534 re.DOTALL)
535 if not match:
536 return None
537
538 def to_ascii(val):
539 if not isinstance(val, basestring):
540 raise ValueError()
541 return val.encode('ascii')
542
543 try:
544 data = json.loads(match.group(1))
545 if not isinstance(data, dict):
546 raise ValueError()
547 isolated_hash = to_ascii(data['hash'])
548 namespace = to_ascii(data['namespace'])
549 isolate_server = to_ascii(data['storage'])
550 if not file_path.is_url(isolate_server):
551 raise ValueError()
Kevin Graneyc2c3b9e2014-08-26 09:04:17 -0400552 data = {
553 'hash': isolated_hash,
554 'namespace': namespace,
555 'server': isolate_server,
556 'view_url': '%s/browse?%s' % (isolate_server, urllib.urlencode(
557 [('namespace', namespace), ('hash', isolated_hash)])),
558 }
559 return data
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700560 except (KeyError, ValueError):
561 logging.warning(
562 'Unexpected value of run_isolated_out_hack: %s', match.group(1))
563 return None
564
565
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500566def now():
567 """Exists so it can be mocked easily."""
568 return time.time()
569
570
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700571def retrieve_results(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400572 base_url, shard_index, task_id, timeout, should_stop, output_collector):
573 """Retrieves results for a single task ID.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700574
Vadim Shtayurab450c602014-05-12 19:23:25 -0700575 Returns:
576 <result dict> on success.
577 None on failure.
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700578 """
maruel@chromium.org814d23f2013-10-01 19:08:00 +0000579 assert isinstance(timeout, float), timeout
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400580 result_url = '%s/swarming/api/v1/client/task/%s' % (base_url, task_id)
581 output_url = '%s/swarming/api/v1/client/task/%s/output/all' % (
582 base_url, task_id)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700583 started = now()
584 deadline = started + timeout if timeout else None
585 attempt = 0
586
587 while not should_stop.is_set():
588 attempt += 1
589
590 # Waiting for too long -> give up.
591 current_time = now()
592 if deadline and current_time >= deadline:
593 logging.error('retrieve_results(%s) timed out on attempt %d',
594 base_url, attempt)
595 return None
596
597 # Do not spin too fast. Spin faster at the beginning though.
598 # Start with 1 sec delay and for each 30 sec of waiting add another second
599 # of delay, until hitting 15 sec ceiling.
600 if attempt > 1:
601 max_delay = min(15, 1 + (current_time - started) / 30.0)
602 delay = min(max_delay, deadline - current_time) if deadline else max_delay
603 if delay > 0:
604 logging.debug('Waiting %.1f sec before retrying', delay)
605 should_stop.wait(delay)
606 if should_stop.is_set():
607 return None
608
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400609 # Disable internal retries in net.url_read_json, since we are doing retries
610 # ourselves.
611 # TODO(maruel): We'd need to know if it's a 404 and not retry at all.
612 result = net.url_read_json(result_url, retry_50x=False)
613 if not result:
Marc-Antoine Ruel200b3952014-08-14 11:07:44 -0400614 continue
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400615 if result['state'] in State.STATES_NOT_RUNNING:
616 out = net.url_read_json(output_url)
617 result['outputs'] = (out or {}).get('outputs', [])
618 if not result['outputs']:
619 logging.error('No output found for task %s', task_id)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700620 # Record the result, try to fetch attached output files (if any).
621 if output_collector:
622 # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
Vadim Shtayurab450c602014-05-12 19:23:25 -0700623 output_collector.process_shard_result(shard_index, result)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700624 return result
maruel@chromium.org0437a732013-08-27 16:05:52 +0000625
626
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700627def yield_results(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400628 swarm_base_url, task_ids, timeout, max_threads, print_status_updates,
629 output_collector):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500630 """Yields swarming task results from the swarming server as (index, result).
maruel@chromium.org0437a732013-08-27 16:05:52 +0000631
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700632 Duplicate shards are ignored. Shards are yielded in order of completion.
633 Timed out shards are NOT yielded at all. Caller can compare number of yielded
634 shards with len(task_keys) to verify all shards completed.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000635
636 max_threads is optional and is used to limit the number of parallel fetches
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500637 done. Since in general the number of task_keys is in the range <=10, it's not
maruel@chromium.org0437a732013-08-27 16:05:52 +0000638 worth normally to limit the number threads. Mostly used for testing purposes.
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500639
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700640 output_collector is an optional instance of TaskOutputCollector that will be
641 used to fetch files produced by a task from isolate server to the local disk.
642
Marc-Antoine Ruel5c720342014-02-21 14:46:14 -0500643 Yields:
644 (index, result). In particular, 'result' is defined as the
645 GetRunnerResults() function in services/swarming/server/test_runner.py.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000646 """
maruel@chromium.org0437a732013-08-27 16:05:52 +0000647 number_threads = (
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400648 min(max_threads, len(task_ids)) if max_threads else len(task_ids))
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700649 should_stop = threading.Event()
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700650 results_channel = threading_utils.TaskChannel()
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700651
maruel@chromium.org0437a732013-08-27 16:05:52 +0000652 with threading_utils.ThreadPool(number_threads, number_threads, 0) as pool:
653 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700654 # Adds a task to the thread pool to call 'retrieve_results' and return
655 # the results together with shard_index that produced them (as a tuple).
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400656 def enqueue_retrieve_results(shard_index, task_id):
Vadim Shtayurab450c602014-05-12 19:23:25 -0700657 task_fn = lambda *args: (shard_index, retrieve_results(*args))
maruel@chromium.org0437a732013-08-27 16:05:52 +0000658 pool.add_task(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400659 0, results_channel.wrap_task(task_fn), swarm_base_url, shard_index,
660 task_id, timeout, should_stop, output_collector)
Vadim Shtayurab450c602014-05-12 19:23:25 -0700661
662 # Enqueue 'retrieve_results' calls for each shard key to run in parallel.
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400663 for shard_index, task_id in enumerate(task_ids):
664 enqueue_retrieve_results(shard_index, task_id)
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700665
666 # Wait for all of them to finish.
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400667 shards_remaining = range(len(task_ids))
668 active_task_count = len(task_ids)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700669 while active_task_count:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700670 shard_index, result = None, None
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700671 try:
Vadim Shtayurab450c602014-05-12 19:23:25 -0700672 shard_index, result = results_channel.pull(
673 timeout=STATUS_UPDATE_INTERVAL)
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700674 except threading_utils.TaskChannel.Timeout:
675 if print_status_updates:
676 print(
677 'Waiting for results from the following shards: %s' %
678 ', '.join(map(str, shards_remaining)))
679 sys.stdout.flush()
680 continue
681 except Exception:
682 logging.exception('Unexpected exception in retrieve_results')
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700683
684 # A call to 'retrieve_results' finished (successfully or not).
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700685 active_task_count -= 1
maruel@chromium.org0437a732013-08-27 16:05:52 +0000686 if not result:
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500687 logging.error('Failed to retrieve the results for a swarming key')
maruel@chromium.org0437a732013-08-27 16:05:52 +0000688 continue
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700689
Vadim Shtayurab450c602014-05-12 19:23:25 -0700690 # Yield back results to the caller.
691 assert shard_index in shards_remaining
692 shards_remaining.remove(shard_index)
693 yield shard_index, result
Vadim Shtayurab19319e2014-04-27 08:50:06 -0700694
maruel@chromium.org0437a732013-08-27 16:05:52 +0000695 finally:
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700696 # Done or aborted with Ctrl+C, kill the remaining threads.
maruel@chromium.org0437a732013-08-27 16:05:52 +0000697 should_stop.set()
698
699
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400700def decorate_shard_output(
701 swarming, shard_index, result, shard_exit_code, shard_duration):
maruel@chromium.org0437a732013-08-27 16:05:52 +0000702 """Returns wrapped output for swarming task shard."""
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400703 url = '%s/user/task/%s' % (swarming, result['id'])
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400704 tag_header = 'Shard %d %s' % (shard_index, url)
Marc-Antoine Ruel9b17dae2014-10-17 16:28:43 -0400705 tag_footer = 'End of shard %d Duration: %.1fs Bot: %s Exit code %s' % (
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400706 shard_index, shard_duration, result['bot_id'], shard_exit_code)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400707
708 tag_len = max(len(tag_header), len(tag_footer))
709 dash_pad = '+-%s-+\n' % ('-' * tag_len)
710 tag_header = '| %s |\n' % tag_header.ljust(tag_len)
711 tag_footer = '| %s |\n' % tag_footer.ljust(tag_len)
712
713 header = dash_pad + tag_header + dash_pad
714 footer = dash_pad + tag_footer + dash_pad[:-1]
715 output = '\n'.join(o for o in result['outputs'] if o).rstrip() + '\n'
716 return header + output + footer
maruel@chromium.org0437a732013-08-27 16:05:52 +0000717
718
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700719def collect(
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400720 swarming, task_name, task_ids, timeout, decorate, print_status_updates,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400721 task_summary_json, task_output_dir):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500722 """Retrieves results of a Swarming task."""
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700723 # Collect summary JSON and output files (if task_output_dir is not None).
724 output_collector = TaskOutputCollector(
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400725 task_output_dir, task_name, len(task_ids))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700726
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700727 seen_shards = set()
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400728 exit_code = 0
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400729 total_duration = 0
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700730 try:
731 for index, output in yield_results(
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400732 swarming, task_ids, timeout, None, print_status_updates,
733 output_collector):
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700734 seen_shards.add(index)
Vadim Shtayura473455a2014-05-14 15:22:35 -0700735
Marc-Antoine Ruel9b17dae2014-10-17 16:28:43 -0400736 # Grab first non-zero exit code as an overall shard exit code. Default to
737 # failure if there was no process that even started.
738 shard_exit_code = 1
739 shard_exit_codes = sorted(output['exit_codes'], key=lambda x: not x)
740 if shard_exit_codes:
741 shard_exit_code = shard_exit_codes[0]
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400742 if shard_exit_code:
743 exit_code = shard_exit_code
Vadim Shtayura473455a2014-05-14 15:22:35 -0700744
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400745 shard_duration = sum(i for i in output['durations'] if i)
746 total_duration += shard_duration
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700747 if decorate:
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400748 print(decorate_shard_output(
749 swarming, index, output, shard_exit_code, shard_duration))
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400750 if len(seen_shards) < len(task_ids):
751 print('')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700752 else:
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400753 print('%s: %s %d' % (output['bot_id'], output['id'], shard_exit_code))
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400754 for output in output['outputs']:
755 if not output:
756 continue
757 output = output.rstrip()
758 if output:
759 print(''.join(' %s\n' % l for l in output.splitlines()))
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700760 finally:
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700761 summary = output_collector.finalize()
762 if task_summary_json:
763 tools.write_json(task_summary_json, summary, False)
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700764
Marc-Antoine Rueld59e8072014-10-21 18:54:45 -0400765 if decorate and total_duration:
766 print('Total duration: %.1fs' % total_duration)
767
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400768 if len(seen_shards) != len(task_ids):
769 missing_shards = [x for x in range(len(task_ids)) if x not in seen_shards]
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700770 print >> sys.stderr, ('Results from some shards are missing: %s' %
771 ', '.join(map(str, missing_shards)))
Vadim Shtayurac524f512014-05-15 09:54:56 -0700772 return 1
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700773
Marc-Antoine Ruel4e6b73d2014-10-03 18:00:05 -0400774 return exit_code
maruel@chromium.org0437a732013-08-27 16:05:52 +0000775
776
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500777### Commands.
778
779
780def abort_task(_swarming, _manifest):
781 """Given a task manifest that was triggered, aborts its execution."""
782 # TODO(vadimsh): No supported by the server yet.
783
784
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400785def add_filter_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500786 parser.filter_group = tools.optparse.OptionGroup(parser, 'Filtering slaves')
787 parser.filter_group.add_option(
Marc-Antoine Ruelb39e8cf2014-01-20 10:39:31 -0500788 '-d', '--dimension', default=[], action='append', nargs=2,
Marc-Antoine Ruel92f32422013-11-06 18:12:13 -0500789 dest='dimensions', metavar='FOO bar',
790 help='dimension to filter on')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500791 parser.add_option_group(parser.filter_group)
792
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400793
Vadim Shtayurab450c602014-05-12 19:23:25 -0700794def add_sharding_options(parser):
795 parser.sharding_group = tools.optparse.OptionGroup(parser, 'Sharding options')
796 parser.sharding_group.add_option(
797 '--shards', type='int', default=1,
798 help='Number of shards to trigger and collect.')
799 parser.add_option_group(parser.sharding_group)
800
801
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400802def add_trigger_options(parser):
803 """Adds all options to trigger a task on Swarming."""
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -0500804 isolateserver.add_isolate_server_options(parser)
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400805 add_filter_options(parser)
806
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500807 parser.task_group = tools.optparse.OptionGroup(parser, 'Task properties')
808 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500809 '-e', '--env', default=[], action='append', nargs=2, metavar='FOO bar',
Vadim Shtayurab450c602014-05-12 19:23:25 -0700810 help='Environment variables to set')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500811 parser.task_group.add_option(
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500812 '--priority', type='int', default=100,
813 help='The lower value, the more important the task is')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500814 parser.task_group.add_option(
Marc-Antoine Ruel5b475782014-02-14 20:57:59 -0500815 '-T', '--task-name',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400816 help='Display name of the task. Defaults to '
817 '<base_name>/<dimensions>/<isolated hash>/<timestamp> if an '
818 'isolated file is provided, if a hash is provided, it defaults to '
819 '<user>/<dimensions>/<isolated hash>/<timestamp>')
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400820 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400821 '--tags', action='append', default=[],
822 help='Tags to assign to the task.')
823 parser.task_group.add_option(
Marc-Antoine Ruel686a2872014-12-05 10:06:29 -0500824 '--user', default='',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400825 help='User associated with the task. Defaults to authenticated user on '
826 'the server.')
827 parser.task_group.add_option(
Marc-Antoine Ruel02196392014-10-17 16:29:43 -0400828 '--idempotent', action='store_true', default=False,
829 help='When set, the server will actively try to find a previous task '
830 'with the same parameter and return this result instead if possible')
831 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400832 '--expiration', type='int', default=6*60*60,
Marc-Antoine Ruel13b7b782014-03-14 11:14:57 -0400833 help='Seconds to allow the task to be pending for a bot to run before '
834 'this task request expires.')
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400835 parser.task_group.add_option(
Marc-Antoine Ruel77142812014-10-03 11:19:43 -0400836 '--deadline', type='int', dest='expiration',
837 help=tools.optparse.SUPPRESS_HELP)
838 parser.task_group.add_option(
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400839 '--hard-timeout', type='int', default=60*60,
840 help='Seconds to allow the task to complete.')
841 parser.task_group.add_option(
842 '--io-timeout', type='int', default=20*60,
843 help='Seconds to allow the task to be silent.')
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500844 parser.task_group.add_option(
845 '--raw-cmd', action='store_true', default=False,
846 help='When set, the command after -- is used as-is without run_isolated. '
847 'In this case, no .isolated file is expected.')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500848 parser.add_option_group(parser.task_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000849
850
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -0500851def process_trigger_options(parser, options, args):
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -0500852 """Processes trigger options and uploads files to isolate server if necessary.
853 """
854 options.dimensions = dict(options.dimensions)
855 options.env = dict(options.env)
856
857 data = []
858 if not options.dimensions:
859 parser.error('Please at least specify one --dimension')
860 if options.raw_cmd:
861 if not args:
862 parser.error(
863 'Arguments with --raw-cmd should be passed after -- as command '
864 'delimiter.')
865 if options.isolate_server:
866 parser.error('Can\'t use both --raw-cmd and --isolate-server.')
867
868 command = args
869 if not options.task_name:
870 options.task_name = '%s/%s' % (
871 options.user,
872 '_'.join(
873 '%s=%s' % (k, v)
874 for k, v in sorted(options.dimensions.iteritems())))
875 else:
876 isolateserver.process_isolate_server_options(parser, options, False)
877 try:
878 command, data = isolated_handle_options(options, args)
879 except ValueError as e:
880 parser.error(str(e))
881
882 return TaskRequest(
883 command=command,
884 data=data,
885 dimensions=options.dimensions,
886 env=options.env,
887 expiration=options.expiration,
888 hard_timeout=options.hard_timeout,
889 idempotent=options.idempotent,
890 io_timeout=options.io_timeout,
891 name=options.task_name,
892 priority=options.priority,
893 tags=options.tags,
894 user=options.user,
895 verbose=options.verbose)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000896
897
898def add_collect_options(parser):
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500899 parser.server_group.add_option(
maruel@chromium.org0437a732013-08-27 16:05:52 +0000900 '-t', '--timeout',
901 type='float',
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -0400902 default=80*60.,
maruel@chromium.org0437a732013-08-27 16:05:52 +0000903 help='Timeout to wait for result, set to 0 for no timeout; default: '
904 '%default s')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -0500905 parser.group_logging.add_option(
906 '--decorate', action='store_true', help='Decorate output')
Vadim Shtayura86a2cef2014-04-18 11:13:39 -0700907 parser.group_logging.add_option(
908 '--print-status-updates', action='store_true',
909 help='Print periodic status updates')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700910 parser.task_output_group = tools.optparse.OptionGroup(parser, 'Task output')
911 parser.task_output_group.add_option(
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700912 '--task-summary-json',
913 metavar='FILE',
914 help='Dump a summary of task results to this file as json. It contains '
915 'only shards statuses as know to server directly. Any output files '
916 'emitted by the task can be collected by using --task-output-dir')
917 parser.task_output_group.add_option(
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700918 '--task-output-dir',
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700919 metavar='DIR',
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700920 help='Directory to put task results into. When the task finishes, this '
Vadim Shtayurac8437bf2014-07-09 19:45:36 -0700921 'directory contains per-shard directory with output files produced '
922 'by shards: <task-output-dir>/<zero-based-shard-index>/.')
Vadim Shtayurae3fbd102014-04-29 17:05:21 -0700923 parser.add_option_group(parser.task_output_group)
maruel@chromium.org0437a732013-08-27 16:05:52 +0000924
925
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400926def CMDbots(parser, args):
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400927 """Returns information about the bots connected to the Swarming server."""
928 add_filter_options(parser)
929 parser.filter_group.add_option(
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400930 '--dead-only', action='store_true',
931 help='Only print dead bots, useful to reap them and reimage broken bots')
932 parser.filter_group.add_option(
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400933 '-k', '--keep-dead', action='store_true',
934 help='Do not filter out dead bots')
935 parser.filter_group.add_option(
936 '-b', '--bare', action='store_true',
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400937 help='Do not print out dimensions')
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400938 options, args = parser.parse_args(args)
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400939
940 if options.keep_dead and options.dead_only:
941 parser.error('Use only one of --keep-dead and --dead-only')
Vadim Shtayura6b555c12014-07-23 16:22:18 -0700942
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400943 bots = []
944 cursor = None
945 limit = 250
946 # Iterate via cursors.
947 base_url = options.swarming + '/swarming/api/v1/client/bots?limit=%d' % limit
948 while True:
949 url = base_url
950 if cursor:
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400951 url += '&cursor=%s' % urllib.quote(cursor)
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400952 data = net.url_read_json(url)
953 if data is None:
954 print >> sys.stderr, 'Failed to access %s' % options.swarming
955 return 1
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400956 bots.extend(data['items'])
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400957 cursor = data['cursor']
958 if not cursor:
959 break
960
961 for bot in natsort.natsorted(bots, key=lambda x: x['id']):
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400962 if options.dead_only:
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400963 if not bot['is_dead']:
Marc-Antoine Ruel28083112014-03-13 16:34:04 -0400964 continue
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400965 elif not options.keep_dead and bot['is_dead']:
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400966 continue
967
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400968 # If the user requested to filter on dimensions, ensure the bot has all the
969 # dimensions requested.
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400970 dimensions = bot['dimensions']
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400971 for key, value in options.dimensions:
972 if key not in dimensions:
973 break
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400974 # A bot can have multiple value for a key, for example,
975 # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
976 # be accepted.
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400977 if isinstance(dimensions[key], list):
978 if value not in dimensions[key]:
979 break
980 else:
981 if value != dimensions[key]:
982 break
983 else:
Marc-Antoine Ruelc6c579e2014-09-08 18:43:45 -0400984 print bot['id']
Marc-Antoine Ruele7b00162014-03-12 16:59:01 -0400985 if not options.bare:
Marc-Antoine Ruel0a620612014-08-13 15:47:07 -0400986 print ' %s' % json.dumps(dimensions, sort_keys=True)
Marc-Antoine Ruelfd491172014-11-19 19:26:13 -0500987 if bot.get('task_id'):
988 print ' task: %s' % bot['task_id']
Marc-Antoine Ruel819fb162014-03-12 16:38:26 -0400989 return 0
990
991
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400992@subcommand.usage('--json file | task_id...')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400993def CMDcollect(parser, args):
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -0400994 """Retrieves results of one or multiple Swarming task by its ID.
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -0400995
996 The result can be in multiple part if the execution was sharded. It can
997 potentially have retries.
998 """
999 add_collect_options(parser)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001000 parser.add_option(
1001 '-j', '--json',
1002 help='Load the task ids from .json as saved by trigger --dump-json')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001003 (options, args) = parser.parse_args(args)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001004 if not args and not options.json:
1005 parser.error('Must specify at least one task id or --json.')
1006 if args and options.json:
1007 parser.error('Only use one of task id or --json.')
1008
1009 if options.json:
1010 with open(options.json) as f:
1011 tasks = sorted(
1012 json.load(f)['tasks'].itervalues(), key=lambda x: x['shard_index'])
1013 args = [t['task_id'] for t in tasks]
1014 else:
1015 valid = frozenset('0123456789abcdef')
1016 if any(not valid.issuperset(task_id) for task_id in args):
1017 parser.error('Task ids are 0-9a-f.')
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001018
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001019 try:
1020 return collect(
1021 options.swarming,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001022 None,
1023 args,
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001024 options.timeout,
1025 options.decorate,
1026 options.print_status_updates,
1027 options.task_summary_json,
1028 options.task_output_dir)
1029 except Failure:
1030 on_error.report(None)
1031 return 1
1032
1033
1034@subcommand.usage('[resource name]')
1035def CMDquery(parser, args):
1036 """Returns raw JSON information via an URL endpoint. Use 'list' to gather the
1037 list of valid values from the server.
1038
1039 Examples:
1040 Printing the list of known URLs:
1041 swarming.py query -S https://server-url list
1042
1043 Listing last 50 tasks on a specific bot named 'swarm1'
1044 swarming.py query -S https://server-url --limit 50 bot/swarm1/tasks
1045 """
1046 CHUNK_SIZE = 250
1047
1048 parser.add_option(
1049 '-L', '--limit', type='int', default=200,
1050 help='Limit to enforce on limitless items (like number of tasks); '
1051 'default=%default')
1052 (options, args) = parser.parse_args(args)
1053 if len(args) != 1:
1054 parser.error('Must specify only one resource name.')
1055
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001056 base_url = options.swarming + '/swarming/api/v1/client/' + args[0]
1057 url = base_url
1058 if options.limit:
Marc-Antoine Ruelea74f292014-10-24 20:55:39 -04001059 # Check check, change if not working out.
1060 merge_char = '&' if '?' in url else '?'
1061 url += '%slimit=%d' % (merge_char, min(CHUNK_SIZE, options.limit))
Marc-Antoine Ruel79940ae2014-09-23 17:55:41 -04001062 data = net.url_read_json(url)
1063 if data is None:
1064 print >> sys.stderr, 'Failed to access %s' % options.swarming
1065 return 1
1066
1067 # Some items support cursors. Try to get automatically if cursors are needed
1068 # by looking at the 'cursor' items.
1069 while (
1070 data.get('cursor') and
1071 (not options.limit or len(data['items']) < options.limit)):
1072 url = base_url + '?cursor=%s' % urllib.quote(data['cursor'])
1073 if options.limit:
1074 url += '&limit=%d' % min(CHUNK_SIZE, options.limit - len(data['items']))
1075 new = net.url_read_json(url)
1076 if new is None:
1077 print >> sys.stderr, 'Failed to access %s' % options.swarming
1078 return 1
1079 data['items'].extend(new['items'])
1080 data['cursor'] = new['cursor']
1081
1082 if options.limit and len(data.get('items', [])) > options.limit:
1083 data['items'] = data['items'][:options.limit]
1084 data.pop('cursor', None)
1085
1086 json.dump(data, sys.stdout, indent=2, sort_keys=True)
1087 sys.stdout.write('\n')
1088 return 0
1089
1090
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001091@subcommand.usage('(hash|isolated) [-- extra_args]')
maruel@chromium.org0437a732013-08-27 16:05:52 +00001092def CMDrun(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001093 """Triggers a task and wait for the results.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001094
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001095 Basically, does everything to run a command remotely.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001096 """
1097 add_trigger_options(parser)
1098 add_collect_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001099 add_sharding_options(parser)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001100 options, args = parser.parse_args(args)
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001101 task_request = process_trigger_options(parser, options, args)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001102 try:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001103 tasks = trigger_task_shards(
1104 options.swarming, task_request, options.shards)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001105 except Failure as e:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001106 on_error.report(
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001107 'Failed to trigger %s(%s): %s' %
1108 (options.task_name, args[0], e.args[0]))
1109 return 1
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001110 if not tasks:
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001111 on_error.report('Failed to trigger the task.')
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001112 return 1
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001113 print('Triggered task: %s' % options.task_name)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001114 task_ids = [
1115 t['task_id']
1116 for t in sorted(tasks.itervalues(), key=lambda x: x['shard_index'])
1117 ]
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001118 try:
1119 return collect(
1120 options.swarming,
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001121 options.task_name,
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001122 task_ids,
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001123 options.timeout,
Vadim Shtayura86a2cef2014-04-18 11:13:39 -07001124 options.decorate,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001125 options.print_status_updates,
Vadim Shtayurac8437bf2014-07-09 19:45:36 -07001126 options.task_summary_json,
Vadim Shtayurae3fbd102014-04-29 17:05:21 -07001127 options.task_output_dir)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001128 except Failure:
1129 on_error.report(None)
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001130 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001131
1132
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001133@subcommand.usage('task_id')
1134def CMDreproduce(parser, args):
1135 """Runs a task locally that was triggered on the server.
1136
1137 This running locally the same commands that have been run on the bot. The data
1138 downloaded will be in a subdirectory named 'work' of the current working
1139 directory.
1140 """
1141 options, args = parser.parse_args(args)
1142 if len(args) != 1:
1143 parser.error('Must specify exactly one task id.')
1144
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001145 url = options.swarming + '/swarming/api/v1/client/task/%s/request' % args[0]
1146 request = net.url_read_json(url)
1147 if not request:
1148 print >> sys.stderr, 'Failed to retrieve request data for the task'
1149 return 1
1150
1151 if not os.path.isdir('work'):
1152 os.mkdir('work')
1153
1154 swarming_host = urlparse.urlparse(options.swarming).netloc
1155 properties = request['properties']
1156 for data_url, _ in properties['data']:
1157 assert data_url.startswith('https://'), data_url
1158 data_host = urlparse.urlparse(data_url).netloc
1159 if data_host != swarming_host:
1160 auth.ensure_logged_in('https://' + data_host)
1161
1162 content = net.url_read(data_url)
1163 if content is None:
1164 print >> sys.stderr, 'Failed to download %s' % data_url
1165 return 1
1166 with zipfile.ZipFile(StringIO.StringIO(content)) as zip_file:
1167 zip_file.extractall('work')
1168
1169 env = None
1170 if properties['env']:
1171 env = os.environ.copy()
Marc-Antoine Ruel119b0842014-12-19 15:27:58 -05001172 logging.info('env: %r', properties['env'])
1173 env.update(
1174 (k.encode('utf-8'), v.encode('utf-8'))
1175 for k, v in properties['env'].iteritems())
Marc-Antoine Ruel13a81272014-10-07 20:16:43 -04001176
1177 exit_code = 0
1178 for cmd in properties['commands']:
1179 try:
1180 c = subprocess.call(cmd, env=env, cwd='work')
1181 except OSError as e:
1182 print >> sys.stderr, 'Failed to run: %s' % ' '.join(cmd)
1183 print >> sys.stderr, str(e)
1184 c = 1
1185 if not exit_code:
1186 exit_code = c
1187 return exit_code
1188
1189
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001190@subcommand.usage("(hash|isolated) [-- extra_args|raw command]")
maruel@chromium.org0437a732013-08-27 16:05:52 +00001191def CMDtrigger(parser, args):
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001192 """Triggers a Swarming task.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001193
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001194 Accepts either the hash (sha1) of a .isolated file already uploaded or the
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001195 path to an .isolated file to archive.
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001196
1197 If an .isolated file is specified instead of an hash, it is first archived.
Vadim Shtayuraae8085b2014-05-02 17:13:10 -07001198
1199 Passes all extra arguments provided after '--' as additional command line
1200 arguments for an isolated command specified in *.isolate file.
maruel@chromium.org0437a732013-08-27 16:05:52 +00001201 """
1202 add_trigger_options(parser)
Vadim Shtayurab450c602014-05-12 19:23:25 -07001203 add_sharding_options(parser)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001204 parser.add_option(
1205 '--dump-json',
1206 metavar='FILE',
1207 help='Dump details about the triggered task(s) to this file as json')
Marc-Antoine Ruel7c543272013-11-26 13:26:15 -05001208 options, args = parser.parse_args(args)
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001209 task_request = process_trigger_options(parser, options, args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001210 try:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001211 tasks = trigger_task_shards(
1212 options.swarming, task_request, options.shards)
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001213 if tasks:
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001214 print('Triggered task: %s' % options.task_name)
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001215 tasks_sorted = sorted(
1216 tasks.itervalues(), key=lambda x: x['shard_index'])
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001217 if options.dump_json:
1218 data = {
Marc-Antoine Ruelefdc5282014-12-12 19:31:00 -05001219 'base_task_name': options.task_name,
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001220 'tasks': tasks,
1221 }
1222 tools.write_json(options.dump_json, data, True)
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001223 print('To collect results, use:')
1224 print(' swarming.py collect -S %s --json %s' %
1225 (options.swarming, options.dump_json))
1226 else:
Marc-Antoine Ruel12a7da42014-10-01 08:29:47 -04001227 print('To collect results, use:')
1228 print(' swarming.py collect -S %s %s' %
Marc-Antoine Ruel2f6581a2014-10-03 11:09:53 -04001229 (options.swarming, ' '.join(t['task_id'] for t in tasks_sorted)))
1230 print('Or visit:')
1231 for t in tasks_sorted:
1232 print(' ' + t['view_url'])
Marc-Antoine Rueld6dbe762014-06-18 13:49:42 -04001233 return int(not tasks)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001234 except Failure:
1235 on_error.report(None)
vadimsh@chromium.orgd908a542013-10-30 01:36:17 +00001236 return 1
maruel@chromium.org0437a732013-08-27 16:05:52 +00001237
1238
1239class OptionParserSwarming(tools.OptionParserWithLogging):
1240 def __init__(self, **kwargs):
1241 tools.OptionParserWithLogging.__init__(
1242 self, prog='swarming.py', **kwargs)
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001243 self.server_group = tools.optparse.OptionGroup(self, 'Server')
1244 self.server_group.add_option(
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001245 '-S', '--swarming',
Kevin Graney5346c162014-01-24 12:20:01 -05001246 metavar='URL', default=os.environ.get('SWARMING_SERVER', ''),
maruel@chromium.orge9403ab2013-09-20 18:03:49 +00001247 help='Swarming server to use')
Marc-Antoine Ruel5471e3d2013-11-11 19:10:32 -05001248 self.add_option_group(self.server_group)
Vadim Shtayurae34e13a2014-02-02 11:23:26 -08001249 auth.add_auth_options(self)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001250
1251 def parse_args(self, *args, **kwargs):
1252 options, args = tools.OptionParserWithLogging.parse_args(
1253 self, *args, **kwargs)
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001254 auth.process_auth_options(self, options)
1255 user = self._process_swarming(options)
1256 if hasattr(options, 'user') and not options.user:
1257 options.user = user
1258 return options, args
1259
1260 def _process_swarming(self, options):
1261 """Processes the --swarming option and aborts if not specified.
1262
1263 Returns the identity as determined by the server.
1264 """
maruel@chromium.org0437a732013-08-27 16:05:52 +00001265 if not options.swarming:
1266 self.error('--swarming is required.')
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001267 try:
1268 options.swarming = net.fix_url(options.swarming)
1269 except ValueError as e:
1270 self.error('--swarming %s' % e)
1271 on_error.report_on_exception_exit(options.swarming)
Marc-Antoine Ruelf7d737d2014-12-10 15:36:29 -05001272 try:
1273 user = auth.ensure_logged_in(options.swarming)
1274 except ValueError as e:
1275 self.error(str(e))
Marc-Antoine Ruel012067b2014-12-10 15:45:42 -05001276 return user
maruel@chromium.org0437a732013-08-27 16:05:52 +00001277
1278
1279def main(args):
1280 dispatcher = subcommand.CommandDispatcher(__name__)
Marc-Antoine Ruelcfb60852014-07-02 15:22:00 -04001281 return dispatcher.execute(OptionParserSwarming(version=__version__), args)
maruel@chromium.org0437a732013-08-27 16:05:52 +00001282
1283
1284if __name__ == '__main__':
1285 fix_encoding.fix_encoding()
1286 tools.disable_buffering()
1287 colorama.init()
1288 sys.exit(main(sys.argv[1:]))