mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 1 | # Copyright Martin J. Bligh, Google Inc 2008 |
| 2 | # Released under the GPL v2 |
| 3 | |
| 4 | """ |
| 5 | This class allows you to communicate with the frontend to submit jobs etc |
| 6 | It is designed for writing more sophisiticated server-side control files that |
| 7 | can recursively add and manage other jobs. |
| 8 | |
| 9 | We turn the JSON dictionaries into real objects that are more idiomatic |
| 10 | |
| 11 | For docs, see http://autotest//afe/server/noauth/rpc/ |
| 12 | http://docs.djangoproject.com/en/dev/ref/models/querysets/#queryset-api |
| 13 | """ |
| 14 | |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 15 | import os, time, traceback |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 16 | import common |
| 17 | from autotest_lib.frontend.afe import rpc_client_lib |
| 18 | from autotest_lib.client.common_lib import utils |
| 19 | |
| 20 | |
| 21 | def dump_object(header, obj): |
| 22 | """ |
| 23 | Standard way to print out the frontend objects (eg job, host, acl, label) |
| 24 | in a human-readable fashion for debugging |
| 25 | """ |
| 26 | result = header + '\n' |
| 27 | for key in obj.hash: |
| 28 | if key == 'afe' or key == 'hash': |
| 29 | continue |
| 30 | result += '%20s: %s\n' % (key, obj.hash[key]) |
| 31 | return result |
| 32 | |
| 33 | |
| 34 | class afe(object): |
| 35 | """ |
| 36 | AFE class for communicating with the autotest frontend |
| 37 | |
| 38 | All the constructors go in the afe class. |
| 39 | Manipulating methods go in the classes themselves |
| 40 | """ |
| 41 | def __init__(self, user=os.environ.get('LOGNAME'), |
| 42 | web_server='http://autotest', print_log=True, debug=False): |
| 43 | """ |
| 44 | Create a cached instance of a connection to the AFE |
| 45 | |
| 46 | user: username to connect as |
| 47 | web_server: AFE instance to connect to |
| 48 | print_log: pring a logging message to stdout on every operation |
| 49 | debug: print out all RPC traffic |
| 50 | """ |
| 51 | self.user = user |
| 52 | self.print_log = print_log |
| 53 | self.debug = debug |
| 54 | headers = {'AUTHORIZATION' : self.user} |
| 55 | rpc_server = web_server + '/afe/server/noauth/rpc/' |
| 56 | self.proxy = rpc_client_lib.get_proxy(rpc_server, headers=headers) |
| 57 | |
| 58 | |
| 59 | def run(self, call, **dargs): |
| 60 | """ |
| 61 | Make a RPC call to the AFE server |
| 62 | """ |
| 63 | rpc_call = getattr(self.proxy, call) |
| 64 | if self.debug: |
| 65 | print 'DEBUG: %s %s' % (call, dargs) |
| 66 | return utils.strip_unicode(rpc_call(**dargs)) |
| 67 | |
| 68 | |
| 69 | def log(self, message): |
| 70 | if self.print_log: |
| 71 | print message |
| 72 | |
| 73 | |
| 74 | def host_statuses(self, live=None): |
| 75 | dead_statuses = ['Dead', 'Repair Failed'] |
| 76 | statuses = self.run('get_static_data')['host_statuses'] |
| 77 | if live == True: |
| 78 | return list(set(statuses) - set(['Dead', 'Repair Failed'])) |
| 79 | if live == False: |
| 80 | return dead_statuses |
| 81 | else: |
| 82 | return statuses |
| 83 | |
| 84 | |
| 85 | def get_hosts(self, **dargs): |
| 86 | hosts = self.run('get_hosts', **dargs) |
| 87 | return [host(self, h) for h in hosts] |
| 88 | |
| 89 | |
| 90 | def create_host(self, hostname, **dargs): |
| 91 | id = self.run('add_host', **dargs) |
| 92 | return self.get_hosts(id=id)[0] |
| 93 | |
| 94 | |
| 95 | def get_labels(self, **dargs): |
| 96 | labels = self.run('get_labels', **dargs) |
| 97 | return [label(self, l) for l in labels] |
| 98 | |
| 99 | |
| 100 | def create_label(self, name, **dargs): |
| 101 | id = self.run('add_label', **dargs) |
| 102 | return self.get_labels(id=id)[0] |
| 103 | |
| 104 | |
| 105 | def get_acls(self, **dargs): |
| 106 | acls = self.run('get_acl_groups', **dargs) |
| 107 | return [acl(self, a) for a in acls] |
| 108 | |
| 109 | |
| 110 | def create_acl(self, name, **dargs): |
| 111 | id = self.run('add_acl_group', **dargs) |
| 112 | return self.get_acls(id=id)[0] |
| 113 | |
| 114 | |
| 115 | def get_jobs(self, summary=False, **dargs): |
| 116 | if summary: |
| 117 | jobs_data = self.run('get_jobs_summary', **dargs) |
| 118 | else: |
| 119 | jobs_data = self.run('get_jobs', **dargs) |
| 120 | return [job(self, j) for j in jobs_data] |
| 121 | |
| 122 | |
| 123 | def get_host_queue_entries(self, **data): |
| 124 | entries = self.run('get_host_queue_entries', **data) |
| 125 | return [job_status(self, e) for e in entries] |
| 126 | |
| 127 | |
| 128 | def create_job_by_test(self, tests, kernel=None, **dargs): |
| 129 | """ |
| 130 | Given a test name, fetch the appropriate control file from the server |
| 131 | and submit it |
| 132 | """ |
| 133 | results = self.run('generate_control_file', tests=tests, kernel=kernel, |
| 134 | use_container=False, do_push_packages=True) |
| 135 | if results['is_server']: |
| 136 | dargs['control_type'] = 'Server' |
| 137 | else: |
| 138 | dargs['control_type'] = 'Client' |
| 139 | dargs['dependencies'] = dargs.get('dependencies', []) + \ |
| 140 | results['dependencies'] |
| 141 | dargs['control_file'] = results['control_file'] |
| 142 | dargs['synch_count'] = results['synch_count'] |
| 143 | return self.create_job(**dargs) |
| 144 | |
| 145 | |
| 146 | def create_job(self, control_file, name=' ', priority='Medium', |
| 147 | control_type='Client', **dargs): |
| 148 | id = self.run('create_job', name=name, priority=priority, |
| 149 | control_file=control_file, control_type=control_type, **dargs) |
| 150 | return self.get_jobs(id=id)[0] |
| 151 | |
| 152 | |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 153 | def run_test_suites(self, pairings, kernel, kernel_label, wait=True, |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 154 | poll_interval=5, email_from=None, email_to=None): |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 155 | """ |
| 156 | Run a list of test suites on a particular kernel. |
| 157 | |
| 158 | Poll for them to complete, and return whether they worked or not. |
| 159 | |
| 160 | pairings: list of MachineTestPairing objects to invoke |
| 161 | kernel: name of the kernel to run |
| 162 | kernel_label: label of the kernel to run |
| 163 | (<kernel-version> : <config> : <date>) |
| 164 | wait: boolean - wait for the results to come back? |
| 165 | poll_interval: interval between polling for job results (in minutes) |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 166 | email_from: send notification email upon completion from here |
| 167 | email_from: send notification email upon completion to here |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 168 | """ |
| 169 | jobs = [] |
| 170 | for pairing in pairings: |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 171 | job = self.invoke_test(pairing, kernel, kernel_label) |
| 172 | job.notified = False |
| 173 | jobs.append(job) |
| 174 | if email_from and email_to: |
| 175 | subject = 'Testing started: %s : %s' % (job.name, job.id) |
| 176 | utils.send_email(email_from, email_to, subject, subject) |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 177 | if not wait: |
| 178 | return |
| 179 | while True: |
| 180 | time.sleep(60 * poll_interval) |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 181 | result = self.poll_all_jobs(jobs, email_from, email_to) |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 182 | if result is not None: |
| 183 | return result |
| 184 | |
| 185 | |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 186 | def result_notify(self, job, email_from, email_to): |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 187 | """ |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 188 | Notify about the result of a job. Will always print, if email data |
| 189 | is provided, will send email for it as well. |
| 190 | |
| 191 | job: job object to notify about |
| 192 | email_from: send notification email upon completion from here |
| 193 | email_from: send notification email upon completion to here |
| 194 | """ |
| 195 | if job.result == True: |
| 196 | subject = 'Testing PASSED: ' |
| 197 | else: |
| 198 | subject = 'Testing FAILED: ' |
| 199 | subject += '%s : %s\n' % (job.name, job.id) |
| 200 | text = [] |
| 201 | for platform in job.results_platform_map: |
| 202 | for status in job.results_platform_map[platform]: |
| 203 | if status == 'Total': |
| 204 | continue |
| 205 | hosts = ','.join(job.results_platform_map[platform][status]) |
| 206 | text.append('%20s %10s %s' % (platform, status, hosts)) |
| 207 | text.append("\nhttp://autotest/tko/compose_query.cgi?columns=test&rows=machine_group&condition=tag~'%s-%%25'&title=Report" % job.id) |
| 208 | body = "\n".join(text) |
| 209 | print "---------------------------------------------------" |
| 210 | print "Subject: ", subject |
| 211 | print body |
| 212 | print "---------------------------------------------------" |
| 213 | if email_from and email_to: |
| 214 | print "Sending email ..." |
| 215 | utils.send_email(email_from, email_to, subject, body) |
| 216 | print |
| 217 | |
| 218 | |
| 219 | def poll_all_jobs(self, jobs, email_from, email_to): |
| 220 | """ |
| 221 | Poll all jobs in a list. |
| 222 | jobs: list of job objects to poll |
| 223 | email_from: send notification email upon completion from here |
| 224 | email_from: send notification email upon completion to here |
| 225 | |
| 226 | Returns: |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 227 | a) All complete successfully (return True) |
| 228 | b) One or more has failed (return False) |
| 229 | c) Cannot tell yet (return None) |
| 230 | """ |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 231 | results = [] |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 232 | for job in jobs: |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 233 | job.result = self.poll_job_results(job, debug=False) |
| 234 | results.append(job.result) |
| 235 | if job.result is not None and not job.notified: |
| 236 | self.result_notify(job, email_from, email_to) |
| 237 | job.notified = True |
| 238 | |
| 239 | if job.result is None: |
| 240 | print 'PENDING', |
| 241 | elif job.result == True: |
| 242 | print 'PASSED', |
| 243 | elif job.result == False: |
| 244 | print 'FAILED', |
| 245 | print ' %s : %s' % (job.id, job.name) |
| 246 | |
| 247 | if None in results: |
| 248 | return None |
| 249 | elif False in results: |
| 250 | return False |
| 251 | else: |
| 252 | return True |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 253 | |
| 254 | |
| 255 | def invoke_test(self, pairing, kernel, kernel_label, priority='Medium'): |
| 256 | """ |
| 257 | Given a pairing of a control file to a machine label, find all machines |
| 258 | with that label, and submit that control file to them. |
| 259 | |
| 260 | Returns a job object |
| 261 | """ |
| 262 | job_name = '%s : %s' % (pairing.machine_label, kernel_label) |
| 263 | hosts = self.get_hosts(multiple_labels=[pairing.machine_label]) |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 264 | host_list = [h.hostname for h in hosts if h.status != 'Repair Failed'] |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 265 | new_job = self.create_job_by_test(name=job_name, |
| 266 | dependencies=[pairing.machine_label], |
| 267 | tests=[pairing.control_file], |
| 268 | priority=priority, |
| 269 | hosts=host_list, |
| 270 | kernel=kernel) |
| 271 | print 'Invoked test %s : %s' % (new_job.id, job_name) |
| 272 | return new_job |
| 273 | |
| 274 | |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 275 | def poll_job_results(self, job, debug=False): |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 276 | """ |
| 277 | Analyse all job results by platform, return: |
| 278 | |
| 279 | False: if any platform has more than one failure |
| 280 | None: if any platform has more than one machine not yet Good. |
| 281 | True: if all platforms have at least all-but-one machines Good. |
| 282 | """ |
| 283 | try: |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 284 | job_statuses = self.get_host_queue_entries(job=job.id) |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 285 | except Exception: |
| 286 | print "Ignoring exception on poll job; RPC interface is flaky" |
| 287 | traceback.print_exc() |
| 288 | return None |
| 289 | |
| 290 | platform_map = {} |
| 291 | for job_status in job_statuses: |
| 292 | hostname = job_status.host.hostname |
| 293 | status = job_status.status |
| 294 | platform = job_status.host.platform |
| 295 | if platform not in platform_map: |
| 296 | platform_map[platform] = {'Total' : [hostname]} |
| 297 | else: |
| 298 | platform_map[platform]['Total'].append(hostname) |
| 299 | new_host_list = platform_map[platform].get(status, []) + [hostname] |
| 300 | platform_map[platform][status] = new_host_list |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame^] | 301 | job.results_platform_map = platform_map |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 302 | |
| 303 | good_platforms = [] |
| 304 | bad_platforms = [] |
| 305 | unknown_platforms = [] |
| 306 | for platform in platform_map: |
| 307 | total = len(platform_map[platform]['Total']) |
| 308 | completed = len(platform_map[platform].get('Completed', [])) |
| 309 | failed = len(platform_map[platform].get('Failed', [])) |
| 310 | if failed > 1: |
| 311 | bad_platforms.append(platform) |
| 312 | elif completed + 1 >= total: |
| 313 | # if all or all but one are good, call the job good. |
| 314 | good_platforms.append(platform) |
| 315 | else: |
| 316 | unknown_platforms.append(platform) |
| 317 | detail = [] |
| 318 | for status in platform_map[platform]: |
| 319 | if status == 'Total': |
| 320 | continue |
| 321 | detail.append('%s=%s' % (status,platform_map[platform][status])) |
| 322 | if debug: |
| 323 | print '%20s %d/%d %s' % (platform, completed, total, |
| 324 | ' '.join(detail)) |
| 325 | print |
| 326 | |
| 327 | if len(bad_platforms) > 0: |
| 328 | if debug: |
| 329 | print 'Result bad - platforms: ' + ' '.join(bad_platforms) |
| 330 | return False |
| 331 | if len(unknown_platforms) > 0: |
| 332 | if debug: |
| 333 | platform_list = ' '.join(unknown_platforms) |
| 334 | print 'Result unknown - platforms: ', platform_list |
| 335 | return None |
| 336 | if debug: |
| 337 | platform_list = ' '.join(good_platforms) |
| 338 | print 'Result good - all platforms passed: ', platform_list |
| 339 | return True |
| 340 | |
| 341 | |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 342 | class rpc_object(object): |
| 343 | """ |
| 344 | Generic object used to construct python objects from rpc calls |
| 345 | """ |
| 346 | def __init__(self, afe, hash): |
| 347 | self.afe = afe |
| 348 | self.hash = hash |
| 349 | self.__dict__.update(hash) |
| 350 | |
| 351 | |
| 352 | def __str__(self): |
| 353 | return dump_object(self.__repr__(), self) |
| 354 | |
| 355 | |
| 356 | class label(rpc_object): |
| 357 | """ |
| 358 | AFE label object |
| 359 | |
| 360 | Fields: |
| 361 | name, invalid, platform, kernel_config, id, only_if_needed |
| 362 | """ |
| 363 | def __repr__(self): |
| 364 | return 'LABEL: %s' % self.name |
| 365 | |
| 366 | |
| 367 | def add_hosts(self, hosts): |
| 368 | return self.afe.run('label_add_hosts', self.id, hosts) |
| 369 | |
| 370 | |
| 371 | def remove_hosts(self, hosts): |
| 372 | return self.afe.run('label_remove_hosts', self.id, hosts) |
| 373 | |
| 374 | |
| 375 | class acl(rpc_object): |
| 376 | """ |
| 377 | AFE acl object |
| 378 | |
| 379 | Fields: |
| 380 | users, hosts, description, name, id |
| 381 | """ |
| 382 | def __repr__(self): |
| 383 | return 'ACL: %s' % self.name |
| 384 | |
| 385 | |
| 386 | def add_hosts(self, hosts): |
| 387 | self.afe.log('Adding hosts %s to ACL %s' % (hosts, self.name)) |
| 388 | return self.afe.run('acl_group_add_hosts', self.id, hosts) |
| 389 | |
| 390 | |
| 391 | def remove_hosts(self, hosts): |
| 392 | self.afe.log('Removing hosts %s from ACL %s' % (hosts, self.name)) |
| 393 | return self.afe.run('acl_group_remove_hosts', self.id, hosts) |
| 394 | |
| 395 | |
| 396 | class job(rpc_object): |
| 397 | """ |
| 398 | AFE job object |
| 399 | |
| 400 | Fields: |
| 401 | name, control_file, control_type, synch_count, reboot_before, |
| 402 | run_verify, priority, email_list, created_on, dependencies, |
| 403 | timeout, owner, reboot_after, id |
| 404 | """ |
| 405 | def __repr__(self): |
| 406 | return 'JOB: %s' % self.id |
| 407 | |
| 408 | |
| 409 | class job_status(rpc_object): |
| 410 | """ |
| 411 | AFE job_status object |
| 412 | |
| 413 | Fields: |
| 414 | status, complete, deleted, meta_host, host, active, execution_subdir, id |
| 415 | """ |
| 416 | def __init__(self, afe, hash): |
| 417 | # This should call super |
| 418 | self.afe = afe |
| 419 | self.hash = hash |
| 420 | self.__dict__.update(hash) |
| 421 | self.job = job(afe, self.job) |
| 422 | if self.host: |
| 423 | self.host = afe.get_hosts(hostname=self.host['hostname'])[0] |
| 424 | |
| 425 | |
| 426 | def __repr__(self): |
| 427 | return 'JOB STATUS: %s-%s' % (self.job.id, self.host.hostname) |
| 428 | |
| 429 | |
| 430 | class host(rpc_object): |
| 431 | """ |
| 432 | AFE host object |
| 433 | |
| 434 | Fields: |
| 435 | status, lock_time, locked_by, locked, hostname, invalid, |
| 436 | synch_id, labels, platform, protection, dirty, id |
| 437 | """ |
| 438 | def __repr__(self): |
| 439 | return 'HOST OBJECT: %s' % self.hostname |
| 440 | |
| 441 | |
| 442 | def show(self): |
| 443 | labels = list(set(self.labels) - set([self.platform])) |
| 444 | print '%-6s %-7s %-7s %-16s %s' % (self.hostname, self.status, |
| 445 | self.locked, self.platform, |
| 446 | ', '.join(labels)) |
| 447 | |
| 448 | |
| 449 | def get_acls(self): |
| 450 | return self.afe.get_acls(hosts__hostname=self.hostname) |
| 451 | |
| 452 | |
| 453 | def add_acl(self, acl_name): |
| 454 | self.afe.log('Adding ACL %s to host %s' % (acl_name, self.hostname)) |
| 455 | return self.afe.run('acl_group_add_hosts', id=acl_name, |
| 456 | hosts=[self.hostname]) |
| 457 | |
| 458 | |
| 459 | def remove_acl(self, acl_name): |
| 460 | self.afe.log('Removing ACL %s from host %s' % (acl_name, self.hostname)) |
| 461 | return self.afe.run('acl_group_remove_hosts', id=acl_name, |
| 462 | hosts=[self.hostname]) |
| 463 | |
| 464 | |
| 465 | def get_labels(self): |
| 466 | return self.afe.get_labels(host__hostname__in=[self.hostname]) |
| 467 | |
| 468 | |
| 469 | def add_labels(self, labels): |
| 470 | self.afe.log('Adding labels %s to host %s' % (labels, self.hostname)) |
| 471 | return self.afe.run('host_add_labels', id=self.id, labels=labels) |
| 472 | |
| 473 | |
| 474 | def remove_labels(self, labels): |
| 475 | self.afe.log('Removing labels %s from host %s' % (labels,self.hostname)) |
| 476 | return self.afe.run('host_remove_labels', id=self.id, labels=labels) |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 477 | |
| 478 | |
| 479 | class MachineTestPairing(object): |
| 480 | """ |
| 481 | Object representing the pairing of a machine label with a control file |
| 482 | """ |
| 483 | def __init__(self, machine_label, control_file): |
| 484 | self.machine_label = machine_label |
| 485 | self.control_file = control_file |