mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 1 | # Copyright Martin J. Bligh, Google Inc 2008 |
| 2 | # Released under the GPL v2 |
| 3 | |
| 4 | """ |
| 5 | This class allows you to communicate with the frontend to submit jobs etc |
| 6 | It is designed for writing more sophisiticated server-side control files that |
| 7 | can recursively add and manage other jobs. |
| 8 | |
| 9 | We turn the JSON dictionaries into real objects that are more idiomatic |
| 10 | |
| 11 | For docs, see http://autotest//afe/server/noauth/rpc/ |
| 12 | http://docs.djangoproject.com/en/dev/ref/models/querysets/#queryset-api |
| 13 | """ |
| 14 | |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 15 | import os, time, traceback |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 16 | import common |
| 17 | from autotest_lib.frontend.afe import rpc_client_lib |
| 18 | from autotest_lib.client.common_lib import utils |
| 19 | |
| 20 | |
| 21 | def dump_object(header, obj): |
| 22 | """ |
| 23 | Standard way to print out the frontend objects (eg job, host, acl, label) |
| 24 | in a human-readable fashion for debugging |
| 25 | """ |
| 26 | result = header + '\n' |
| 27 | for key in obj.hash: |
| 28 | if key == 'afe' or key == 'hash': |
| 29 | continue |
| 30 | result += '%20s: %s\n' % (key, obj.hash[key]) |
| 31 | return result |
| 32 | |
| 33 | |
| 34 | class afe(object): |
| 35 | """ |
| 36 | AFE class for communicating with the autotest frontend |
| 37 | |
| 38 | All the constructors go in the afe class. |
| 39 | Manipulating methods go in the classes themselves |
| 40 | """ |
| 41 | def __init__(self, user=os.environ.get('LOGNAME'), |
| 42 | web_server='http://autotest', print_log=True, debug=False): |
| 43 | """ |
| 44 | Create a cached instance of a connection to the AFE |
| 45 | |
| 46 | user: username to connect as |
| 47 | web_server: AFE instance to connect to |
| 48 | print_log: pring a logging message to stdout on every operation |
| 49 | debug: print out all RPC traffic |
| 50 | """ |
| 51 | self.user = user |
| 52 | self.print_log = print_log |
| 53 | self.debug = debug |
| 54 | headers = {'AUTHORIZATION' : self.user} |
| 55 | rpc_server = web_server + '/afe/server/noauth/rpc/' |
| 56 | self.proxy = rpc_client_lib.get_proxy(rpc_server, headers=headers) |
| 57 | |
| 58 | |
| 59 | def run(self, call, **dargs): |
| 60 | """ |
| 61 | Make a RPC call to the AFE server |
| 62 | """ |
| 63 | rpc_call = getattr(self.proxy, call) |
| 64 | if self.debug: |
| 65 | print 'DEBUG: %s %s' % (call, dargs) |
| 66 | return utils.strip_unicode(rpc_call(**dargs)) |
| 67 | |
| 68 | |
| 69 | def log(self, message): |
| 70 | if self.print_log: |
| 71 | print message |
| 72 | |
| 73 | |
| 74 | def host_statuses(self, live=None): |
| 75 | dead_statuses = ['Dead', 'Repair Failed'] |
| 76 | statuses = self.run('get_static_data')['host_statuses'] |
| 77 | if live == True: |
| 78 | return list(set(statuses) - set(['Dead', 'Repair Failed'])) |
| 79 | if live == False: |
| 80 | return dead_statuses |
| 81 | else: |
| 82 | return statuses |
| 83 | |
| 84 | |
| 85 | def get_hosts(self, **dargs): |
| 86 | hosts = self.run('get_hosts', **dargs) |
| 87 | return [host(self, h) for h in hosts] |
| 88 | |
| 89 | |
| 90 | def create_host(self, hostname, **dargs): |
| 91 | id = self.run('add_host', **dargs) |
| 92 | return self.get_hosts(id=id)[0] |
| 93 | |
| 94 | |
| 95 | def get_labels(self, **dargs): |
| 96 | labels = self.run('get_labels', **dargs) |
| 97 | return [label(self, l) for l in labels] |
| 98 | |
| 99 | |
| 100 | def create_label(self, name, **dargs): |
| 101 | id = self.run('add_label', **dargs) |
| 102 | return self.get_labels(id=id)[0] |
| 103 | |
| 104 | |
| 105 | def get_acls(self, **dargs): |
| 106 | acls = self.run('get_acl_groups', **dargs) |
| 107 | return [acl(self, a) for a in acls] |
| 108 | |
| 109 | |
| 110 | def create_acl(self, name, **dargs): |
| 111 | id = self.run('add_acl_group', **dargs) |
| 112 | return self.get_acls(id=id)[0] |
| 113 | |
| 114 | |
| 115 | def get_jobs(self, summary=False, **dargs): |
| 116 | if summary: |
| 117 | jobs_data = self.run('get_jobs_summary', **dargs) |
| 118 | else: |
| 119 | jobs_data = self.run('get_jobs', **dargs) |
| 120 | return [job(self, j) for j in jobs_data] |
| 121 | |
| 122 | |
| 123 | def get_host_queue_entries(self, **data): |
| 124 | entries = self.run('get_host_queue_entries', **data) |
| 125 | return [job_status(self, e) for e in entries] |
| 126 | |
| 127 | |
| 128 | def create_job_by_test(self, tests, kernel=None, **dargs): |
| 129 | """ |
| 130 | Given a test name, fetch the appropriate control file from the server |
| 131 | and submit it |
| 132 | """ |
| 133 | results = self.run('generate_control_file', tests=tests, kernel=kernel, |
| 134 | use_container=False, do_push_packages=True) |
| 135 | if results['is_server']: |
| 136 | dargs['control_type'] = 'Server' |
| 137 | else: |
| 138 | dargs['control_type'] = 'Client' |
| 139 | dargs['dependencies'] = dargs.get('dependencies', []) + \ |
| 140 | results['dependencies'] |
| 141 | dargs['control_file'] = results['control_file'] |
| 142 | dargs['synch_count'] = results['synch_count'] |
| 143 | return self.create_job(**dargs) |
| 144 | |
| 145 | |
| 146 | def create_job(self, control_file, name=' ', priority='Medium', |
| 147 | control_type='Client', **dargs): |
| 148 | id = self.run('create_job', name=name, priority=priority, |
| 149 | control_file=control_file, control_type=control_type, **dargs) |
| 150 | return self.get_jobs(id=id)[0] |
| 151 | |
| 152 | |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 153 | def run_test_suites(self, pairings, kernel, kernel_label, wait=True, |
| 154 | poll_interval=5): |
| 155 | """ |
| 156 | Run a list of test suites on a particular kernel. |
| 157 | |
| 158 | Poll for them to complete, and return whether they worked or not. |
| 159 | |
| 160 | pairings: list of MachineTestPairing objects to invoke |
| 161 | kernel: name of the kernel to run |
| 162 | kernel_label: label of the kernel to run |
| 163 | (<kernel-version> : <config> : <date>) |
| 164 | wait: boolean - wait for the results to come back? |
| 165 | poll_interval: interval between polling for job results (in minutes) |
| 166 | """ |
| 167 | jobs = [] |
| 168 | for pairing in pairings: |
| 169 | jobs.append(self.invoke_test(pairing, kernel, kernel_label)) |
| 170 | if not wait: |
| 171 | return |
| 172 | while True: |
| 173 | time.sleep(60 * poll_interval) |
| 174 | result = self.poll_all_jobs(jobs) |
| 175 | if result is not None: |
| 176 | return result |
| 177 | |
| 178 | |
| 179 | def poll_all_jobs(self, jobs): |
| 180 | """ |
| 181 | Poll all jobs in a list. See whether they are: |
| 182 | a) All complete successfully (return True) |
| 183 | b) One or more has failed (return False) |
| 184 | c) Cannot tell yet (return None) |
| 185 | """ |
| 186 | for job in jobs: |
| 187 | result = self.poll_job_results(job.id, debug=False) |
| 188 | if result == True: |
| 189 | print 'PASSED %s : %s' % (job.id, job.name) |
| 190 | elif result == False: |
| 191 | print 'FAILED %s : %s' % (job.id, job.name) |
| 192 | return False |
| 193 | elif result is None: |
| 194 | print 'PENDING %s : %s' % (job.id, job.name) |
| 195 | return None |
| 196 | return True |
| 197 | |
| 198 | |
| 199 | def invoke_test(self, pairing, kernel, kernel_label, priority='Medium'): |
| 200 | """ |
| 201 | Given a pairing of a control file to a machine label, find all machines |
| 202 | with that label, and submit that control file to them. |
| 203 | |
| 204 | Returns a job object |
| 205 | """ |
| 206 | job_name = '%s : %s' % (pairing.machine_label, kernel_label) |
| 207 | hosts = self.get_hosts(multiple_labels=[pairing.machine_label]) |
| 208 | host_list = [host.hostname for host in hosts] |
| 209 | new_job = self.create_job_by_test(name=job_name, |
| 210 | dependencies=[pairing.machine_label], |
| 211 | tests=[pairing.control_file], |
| 212 | priority=priority, |
| 213 | hosts=host_list, |
| 214 | kernel=kernel) |
| 215 | print 'Invoked test %s : %s' % (new_job.id, job_name) |
| 216 | return new_job |
| 217 | |
| 218 | |
| 219 | def poll_job_results(self, job_id, debug=False): |
| 220 | """ |
| 221 | Analyse all job results by platform, return: |
| 222 | |
| 223 | False: if any platform has more than one failure |
| 224 | None: if any platform has more than one machine not yet Good. |
| 225 | True: if all platforms have at least all-but-one machines Good. |
| 226 | """ |
| 227 | try: |
| 228 | job_statuses = self.get_host_queue_entries(job=job_id) |
| 229 | except Exception: |
| 230 | print "Ignoring exception on poll job; RPC interface is flaky" |
| 231 | traceback.print_exc() |
| 232 | return None |
| 233 | |
| 234 | platform_map = {} |
| 235 | for job_status in job_statuses: |
| 236 | hostname = job_status.host.hostname |
| 237 | status = job_status.status |
| 238 | platform = job_status.host.platform |
| 239 | if platform not in platform_map: |
| 240 | platform_map[platform] = {'Total' : [hostname]} |
| 241 | else: |
| 242 | platform_map[platform]['Total'].append(hostname) |
| 243 | new_host_list = platform_map[platform].get(status, []) + [hostname] |
| 244 | platform_map[platform][status] = new_host_list |
| 245 | |
| 246 | good_platforms = [] |
| 247 | bad_platforms = [] |
| 248 | unknown_platforms = [] |
| 249 | for platform in platform_map: |
| 250 | total = len(platform_map[platform]['Total']) |
| 251 | completed = len(platform_map[platform].get('Completed', [])) |
| 252 | failed = len(platform_map[platform].get('Failed', [])) |
| 253 | if failed > 1: |
| 254 | bad_platforms.append(platform) |
| 255 | elif completed + 1 >= total: |
| 256 | # if all or all but one are good, call the job good. |
| 257 | good_platforms.append(platform) |
| 258 | else: |
| 259 | unknown_platforms.append(platform) |
| 260 | detail = [] |
| 261 | for status in platform_map[platform]: |
| 262 | if status == 'Total': |
| 263 | continue |
| 264 | detail.append('%s=%s' % (status,platform_map[platform][status])) |
| 265 | if debug: |
| 266 | print '%20s %d/%d %s' % (platform, completed, total, |
| 267 | ' '.join(detail)) |
| 268 | print |
| 269 | |
| 270 | if len(bad_platforms) > 0: |
| 271 | if debug: |
| 272 | print 'Result bad - platforms: ' + ' '.join(bad_platforms) |
| 273 | return False |
| 274 | if len(unknown_platforms) > 0: |
| 275 | if debug: |
| 276 | platform_list = ' '.join(unknown_platforms) |
| 277 | print 'Result unknown - platforms: ', platform_list |
| 278 | return None |
| 279 | if debug: |
| 280 | platform_list = ' '.join(good_platforms) |
| 281 | print 'Result good - all platforms passed: ', platform_list |
| 282 | return True |
| 283 | |
| 284 | |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 285 | class rpc_object(object): |
| 286 | """ |
| 287 | Generic object used to construct python objects from rpc calls |
| 288 | """ |
| 289 | def __init__(self, afe, hash): |
| 290 | self.afe = afe |
| 291 | self.hash = hash |
| 292 | self.__dict__.update(hash) |
| 293 | |
| 294 | |
| 295 | def __str__(self): |
| 296 | return dump_object(self.__repr__(), self) |
| 297 | |
| 298 | |
| 299 | class label(rpc_object): |
| 300 | """ |
| 301 | AFE label object |
| 302 | |
| 303 | Fields: |
| 304 | name, invalid, platform, kernel_config, id, only_if_needed |
| 305 | """ |
| 306 | def __repr__(self): |
| 307 | return 'LABEL: %s' % self.name |
| 308 | |
| 309 | |
| 310 | def add_hosts(self, hosts): |
| 311 | return self.afe.run('label_add_hosts', self.id, hosts) |
| 312 | |
| 313 | |
| 314 | def remove_hosts(self, hosts): |
| 315 | return self.afe.run('label_remove_hosts', self.id, hosts) |
| 316 | |
| 317 | |
| 318 | class acl(rpc_object): |
| 319 | """ |
| 320 | AFE acl object |
| 321 | |
| 322 | Fields: |
| 323 | users, hosts, description, name, id |
| 324 | """ |
| 325 | def __repr__(self): |
| 326 | return 'ACL: %s' % self.name |
| 327 | |
| 328 | |
| 329 | def add_hosts(self, hosts): |
| 330 | self.afe.log('Adding hosts %s to ACL %s' % (hosts, self.name)) |
| 331 | return self.afe.run('acl_group_add_hosts', self.id, hosts) |
| 332 | |
| 333 | |
| 334 | def remove_hosts(self, hosts): |
| 335 | self.afe.log('Removing hosts %s from ACL %s' % (hosts, self.name)) |
| 336 | return self.afe.run('acl_group_remove_hosts', self.id, hosts) |
| 337 | |
| 338 | |
| 339 | class job(rpc_object): |
| 340 | """ |
| 341 | AFE job object |
| 342 | |
| 343 | Fields: |
| 344 | name, control_file, control_type, synch_count, reboot_before, |
| 345 | run_verify, priority, email_list, created_on, dependencies, |
| 346 | timeout, owner, reboot_after, id |
| 347 | """ |
| 348 | def __repr__(self): |
| 349 | return 'JOB: %s' % self.id |
| 350 | |
| 351 | |
| 352 | class job_status(rpc_object): |
| 353 | """ |
| 354 | AFE job_status object |
| 355 | |
| 356 | Fields: |
| 357 | status, complete, deleted, meta_host, host, active, execution_subdir, id |
| 358 | """ |
| 359 | def __init__(self, afe, hash): |
| 360 | # This should call super |
| 361 | self.afe = afe |
| 362 | self.hash = hash |
| 363 | self.__dict__.update(hash) |
| 364 | self.job = job(afe, self.job) |
| 365 | if self.host: |
| 366 | self.host = afe.get_hosts(hostname=self.host['hostname'])[0] |
| 367 | |
| 368 | |
| 369 | def __repr__(self): |
| 370 | return 'JOB STATUS: %s-%s' % (self.job.id, self.host.hostname) |
| 371 | |
| 372 | |
| 373 | class host(rpc_object): |
| 374 | """ |
| 375 | AFE host object |
| 376 | |
| 377 | Fields: |
| 378 | status, lock_time, locked_by, locked, hostname, invalid, |
| 379 | synch_id, labels, platform, protection, dirty, id |
| 380 | """ |
| 381 | def __repr__(self): |
| 382 | return 'HOST OBJECT: %s' % self.hostname |
| 383 | |
| 384 | |
| 385 | def show(self): |
| 386 | labels = list(set(self.labels) - set([self.platform])) |
| 387 | print '%-6s %-7s %-7s %-16s %s' % (self.hostname, self.status, |
| 388 | self.locked, self.platform, |
| 389 | ', '.join(labels)) |
| 390 | |
| 391 | |
| 392 | def get_acls(self): |
| 393 | return self.afe.get_acls(hosts__hostname=self.hostname) |
| 394 | |
| 395 | |
| 396 | def add_acl(self, acl_name): |
| 397 | self.afe.log('Adding ACL %s to host %s' % (acl_name, self.hostname)) |
| 398 | return self.afe.run('acl_group_add_hosts', id=acl_name, |
| 399 | hosts=[self.hostname]) |
| 400 | |
| 401 | |
| 402 | def remove_acl(self, acl_name): |
| 403 | self.afe.log('Removing ACL %s from host %s' % (acl_name, self.hostname)) |
| 404 | return self.afe.run('acl_group_remove_hosts', id=acl_name, |
| 405 | hosts=[self.hostname]) |
| 406 | |
| 407 | |
| 408 | def get_labels(self): |
| 409 | return self.afe.get_labels(host__hostname__in=[self.hostname]) |
| 410 | |
| 411 | |
| 412 | def add_labels(self, labels): |
| 413 | self.afe.log('Adding labels %s to host %s' % (labels, self.hostname)) |
| 414 | return self.afe.run('host_add_labels', id=self.id, labels=labels) |
| 415 | |
| 416 | |
| 417 | def remove_labels(self, labels): |
| 418 | self.afe.log('Removing labels %s from host %s' % (labels,self.hostname)) |
| 419 | return self.afe.run('host_remove_labels', id=self.id, labels=labels) |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 420 | |
| 421 | |
| 422 | class MachineTestPairing(object): |
| 423 | """ |
| 424 | Object representing the pairing of a machine label with a control file |
| 425 | """ |
| 426 | def __init__(self, machine_label, control_file): |
| 427 | self.machine_label = machine_label |
| 428 | self.control_file = control_file |