mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 1 | # Copyright Martin J. Bligh, Google Inc 2008 |
| 2 | # Released under the GPL v2 |
| 3 | |
| 4 | """ |
| 5 | This class allows you to communicate with the frontend to submit jobs etc |
| 6 | It is designed for writing more sophisiticated server-side control files that |
| 7 | can recursively add and manage other jobs. |
| 8 | |
| 9 | We turn the JSON dictionaries into real objects that are more idiomatic |
| 10 | |
mbligh | c31e402 | 2008-12-11 19:32:30 +0000 | [diff] [blame] | 11 | For docs, see: |
| 12 | http://autotest/afe/server/noauth/rpc/ |
| 13 | http://autotest/new_tko/server/noauth/rpc/ |
| 14 | http://docs.djangoproject.com/en/dev/ref/models/querysets/#queryset-api |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 15 | """ |
| 16 | |
mbligh | 1f23f36 | 2008-12-22 14:46:12 +0000 | [diff] [blame] | 17 | import os, time, traceback, re |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 18 | import common |
| 19 | from autotest_lib.frontend.afe import rpc_client_lib |
mbligh | 37eceaa | 2008-12-15 22:56:37 +0000 | [diff] [blame] | 20 | from autotest_lib.client.common_lib import global_config |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 21 | from autotest_lib.client.common_lib import utils |
mbligh | 4e57661 | 2008-12-22 14:56:36 +0000 | [diff] [blame] | 22 | try: |
| 23 | from autotest_lib.server.site_common import site_utils as server_utils |
| 24 | except: |
| 25 | from autotest_lib.server import utils as server_utils |
| 26 | form_ntuples_from_machines = server_utils.form_ntuples_from_machines |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 27 | |
mbligh | 37eceaa | 2008-12-15 22:56:37 +0000 | [diff] [blame] | 28 | GLOBAL_CONFIG = global_config.global_config |
| 29 | DEFAULT_SERVER = 'autotest' |
| 30 | |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 31 | |
| 32 | def dump_object(header, obj): |
| 33 | """ |
| 34 | Standard way to print out the frontend objects (eg job, host, acl, label) |
| 35 | in a human-readable fashion for debugging |
| 36 | """ |
| 37 | result = header + '\n' |
| 38 | for key in obj.hash: |
| 39 | if key == 'afe' or key == 'hash': |
| 40 | continue |
| 41 | result += '%20s: %s\n' % (key, obj.hash[key]) |
| 42 | return result |
| 43 | |
| 44 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 45 | class RpcClient(object): |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 46 | """ |
| 47 | AFE class for communicating with the autotest frontend |
| 48 | |
| 49 | All the constructors go in the afe class. |
| 50 | Manipulating methods go in the classes themselves |
| 51 | """ |
mbligh | c31e402 | 2008-12-11 19:32:30 +0000 | [diff] [blame] | 52 | def __init__(self, path, user, web_server, print_log, debug): |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 53 | """ |
| 54 | Create a cached instance of a connection to the AFE |
| 55 | |
| 56 | user: username to connect as |
| 57 | web_server: AFE instance to connect to |
| 58 | print_log: pring a logging message to stdout on every operation |
| 59 | debug: print out all RPC traffic |
| 60 | """ |
mbligh | c31e402 | 2008-12-11 19:32:30 +0000 | [diff] [blame] | 61 | if not user: |
| 62 | user = os.environ.get('LOGNAME') |
| 63 | if not web_server: |
mbligh | 475f776 | 2009-01-30 00:34:04 +0000 | [diff] [blame^] | 64 | if 'AUTOTEST_WEB' in os.environ: |
| 65 | web_server = 'http://' + os.environ['AUTOTEST_WEB'] |
| 66 | else: |
| 67 | web_server = 'http://' + GLOBAL_CONFIG.get_config_value( |
| 68 | 'SERVER', 'hostname', default=DEFAULT_SERVER) |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 69 | self.user = user |
| 70 | self.print_log = print_log |
| 71 | self.debug = debug |
| 72 | headers = {'AUTHORIZATION' : self.user} |
mbligh | c31e402 | 2008-12-11 19:32:30 +0000 | [diff] [blame] | 73 | rpc_server = web_server + path |
mbligh | 1354c9d | 2008-12-22 14:56:13 +0000 | [diff] [blame] | 74 | if debug: |
| 75 | print 'SERVER: %s' % rpc_server |
| 76 | print 'HEADERS: %s' % headers |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 77 | self.proxy = rpc_client_lib.get_proxy(rpc_server, headers=headers) |
| 78 | |
| 79 | |
| 80 | def run(self, call, **dargs): |
| 81 | """ |
| 82 | Make a RPC call to the AFE server |
| 83 | """ |
| 84 | rpc_call = getattr(self.proxy, call) |
| 85 | if self.debug: |
| 86 | print 'DEBUG: %s %s' % (call, dargs) |
| 87 | return utils.strip_unicode(rpc_call(**dargs)) |
| 88 | |
| 89 | |
| 90 | def log(self, message): |
| 91 | if self.print_log: |
| 92 | print message |
| 93 | |
| 94 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 95 | class TKO(RpcClient): |
mbligh | c31e402 | 2008-12-11 19:32:30 +0000 | [diff] [blame] | 96 | def __init__(self, user=None, web_server=None, print_log=True, debug=False): |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 97 | super(TKO, self).__init__('/new_tko/server/noauth/rpc/', user, |
mbligh | c31e402 | 2008-12-11 19:32:30 +0000 | [diff] [blame] | 98 | web_server, print_log, debug) |
| 99 | |
| 100 | |
| 101 | def get_status_counts(self, job, **data): |
| 102 | entries = self.run('get_status_counts', |
| 103 | group_by=['hostname', 'test_name'], |
| 104 | job_tag__startswith='%s-' % job, **data) |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 105 | return [TestStatus(self, e) for e in entries['groups']] |
mbligh | c31e402 | 2008-12-11 19:32:30 +0000 | [diff] [blame] | 106 | |
| 107 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 108 | class AFE(RpcClient): |
mbligh | c31e402 | 2008-12-11 19:32:30 +0000 | [diff] [blame] | 109 | def __init__(self, user=None, web_server=None, print_log=True, debug=False): |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 110 | super(AFE, self).__init__('/afe/server/noauth/rpc/', user, web_server, |
mbligh | c31e402 | 2008-12-11 19:32:30 +0000 | [diff] [blame] | 111 | print_log, debug) |
| 112 | |
| 113 | |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 114 | def host_statuses(self, live=None): |
| 115 | dead_statuses = ['Dead', 'Repair Failed'] |
| 116 | statuses = self.run('get_static_data')['host_statuses'] |
| 117 | if live == True: |
| 118 | return list(set(statuses) - set(['Dead', 'Repair Failed'])) |
| 119 | if live == False: |
| 120 | return dead_statuses |
| 121 | else: |
| 122 | return statuses |
| 123 | |
| 124 | |
| 125 | def get_hosts(self, **dargs): |
| 126 | hosts = self.run('get_hosts', **dargs) |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 127 | return [Host(self, h) for h in hosts] |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 128 | |
| 129 | |
| 130 | def create_host(self, hostname, **dargs): |
mbligh | 54459c7 | 2009-01-21 19:26:44 +0000 | [diff] [blame] | 131 | id = self.run('add_host', hostname=hostname, **dargs) |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 132 | return self.get_hosts(id=id)[0] |
| 133 | |
| 134 | |
| 135 | def get_labels(self, **dargs): |
| 136 | labels = self.run('get_labels', **dargs) |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 137 | return [Label(self, l) for l in labels] |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 138 | |
| 139 | |
| 140 | def create_label(self, name, **dargs): |
mbligh | 54459c7 | 2009-01-21 19:26:44 +0000 | [diff] [blame] | 141 | id = self.run('add_label', name=name, **dargs) |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 142 | return self.get_labels(id=id)[0] |
| 143 | |
| 144 | |
| 145 | def get_acls(self, **dargs): |
| 146 | acls = self.run('get_acl_groups', **dargs) |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 147 | return [Acl(self, a) for a in acls] |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 148 | |
| 149 | |
| 150 | def create_acl(self, name, **dargs): |
mbligh | 54459c7 | 2009-01-21 19:26:44 +0000 | [diff] [blame] | 151 | id = self.run('add_acl_group', name=name, **dargs) |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 152 | return self.get_acls(id=id)[0] |
| 153 | |
| 154 | |
mbligh | 54459c7 | 2009-01-21 19:26:44 +0000 | [diff] [blame] | 155 | def get_users(self, **dargs): |
| 156 | users = self.run('get_users', **dargs) |
| 157 | return [User(self, u) for u in users] |
| 158 | |
| 159 | |
mbligh | 1354c9d | 2008-12-22 14:56:13 +0000 | [diff] [blame] | 160 | def generate_control_file(self, tests, **dargs): |
| 161 | ret = self.run('generate_control_file', tests=tests, **dargs) |
| 162 | return ControlFile(self, ret) |
| 163 | |
| 164 | |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 165 | def get_jobs(self, summary=False, **dargs): |
| 166 | if summary: |
| 167 | jobs_data = self.run('get_jobs_summary', **dargs) |
| 168 | else: |
| 169 | jobs_data = self.run('get_jobs', **dargs) |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 170 | return [Job(self, j) for j in jobs_data] |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 171 | |
| 172 | |
| 173 | def get_host_queue_entries(self, **data): |
| 174 | entries = self.run('get_host_queue_entries', **data) |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 175 | return [JobStatus(self, e) for e in entries] |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 176 | |
| 177 | |
mbligh | 4e57661 | 2008-12-22 14:56:36 +0000 | [diff] [blame] | 178 | def create_job_by_test(self, tests, hosts, kernel=None, use_container=False, |
mbligh | 1354c9d | 2008-12-22 14:56:13 +0000 | [diff] [blame] | 179 | **dargs): |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 180 | """ |
| 181 | Given a test name, fetch the appropriate control file from the server |
mbligh | 4e57661 | 2008-12-22 14:56:36 +0000 | [diff] [blame] | 182 | and submit it. |
| 183 | |
| 184 | Returns a list of job objects |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 185 | """ |
mbligh | 1354c9d | 2008-12-22 14:56:13 +0000 | [diff] [blame] | 186 | control_file = self.generate_control_file(tests=tests, kernel=kernel, |
| 187 | use_container=use_container, |
| 188 | do_push_packages=True) |
| 189 | if control_file.is_server: |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 190 | dargs['control_type'] = 'Server' |
| 191 | else: |
| 192 | dargs['control_type'] = 'Client' |
| 193 | dargs['dependencies'] = dargs.get('dependencies', []) + \ |
mbligh | 1354c9d | 2008-12-22 14:56:13 +0000 | [diff] [blame] | 194 | control_file.dependencies |
| 195 | dargs['control_file'] = control_file.control_file |
| 196 | dargs['synch_count'] = control_file.synch_count |
mbligh | 4e57661 | 2008-12-22 14:56:36 +0000 | [diff] [blame] | 197 | jobs = [] |
| 198 | if control_file.synch_count > 1: |
| 199 | # We don't trust the scheduler to do the groupings for us. |
| 200 | synch_count = control_file.synch_count |
| 201 | (pairs, failures) = form_ntuples_from_machines(hosts, synch_count) |
| 202 | for machines in pairs: |
| 203 | jobs.append(self.create_job(hosts=machines, **dargs)) |
| 204 | else: |
| 205 | jobs.append(self.create_job(hosts=hosts, **dargs)) |
| 206 | return jobs |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 207 | |
| 208 | |
| 209 | def create_job(self, control_file, name=' ', priority='Medium', |
| 210 | control_type='Client', **dargs): |
| 211 | id = self.run('create_job', name=name, priority=priority, |
| 212 | control_file=control_file, control_type=control_type, **dargs) |
| 213 | return self.get_jobs(id=id)[0] |
| 214 | |
| 215 | |
mbligh | 1f23f36 | 2008-12-22 14:46:12 +0000 | [diff] [blame] | 216 | def run_test_suites(self, pairings, kernel, kernel_label, priority='Medium', |
| 217 | wait=True, poll_interval=5, email_from=None, |
mbligh | 7b31228 | 2009-01-07 16:45:43 +0000 | [diff] [blame] | 218 | email_to=None, timeout=168): |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 219 | """ |
| 220 | Run a list of test suites on a particular kernel. |
| 221 | |
| 222 | Poll for them to complete, and return whether they worked or not. |
| 223 | |
| 224 | pairings: list of MachineTestPairing objects to invoke |
| 225 | kernel: name of the kernel to run |
| 226 | kernel_label: label of the kernel to run |
| 227 | (<kernel-version> : <config> : <date>) |
| 228 | wait: boolean - wait for the results to come back? |
| 229 | poll_interval: interval between polling for job results (in minutes) |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 230 | email_from: send notification email upon completion from here |
| 231 | email_from: send notification email upon completion to here |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 232 | """ |
| 233 | jobs = [] |
| 234 | for pairing in pairings: |
mbligh | 7b31228 | 2009-01-07 16:45:43 +0000 | [diff] [blame] | 235 | new_jobs = self.invoke_test(pairing, kernel, kernel_label, priority, |
| 236 | timeout=timeout) |
mbligh | 4e57661 | 2008-12-22 14:56:36 +0000 | [diff] [blame] | 237 | for job in new_jobs: |
| 238 | job.notified = False |
| 239 | jobs += new_jobs |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 240 | # disabled - this is just for debugging: mbligh |
| 241 | # if email_from and email_to: |
| 242 | # subject = 'Testing started: %s : %s' % (job.name, job.id) |
| 243 | # utils.send_email(email_from, email_to, subject, subject) |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 244 | if not wait: |
| 245 | return |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 246 | tko = TKO() |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 247 | while True: |
| 248 | time.sleep(60 * poll_interval) |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 249 | result = self.poll_all_jobs(tko, jobs, email_from, email_to) |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 250 | if result is not None: |
| 251 | return result |
| 252 | |
| 253 | |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 254 | def result_notify(self, job, email_from, email_to): |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 255 | """ |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 256 | Notify about the result of a job. Will always print, if email data |
| 257 | is provided, will send email for it as well. |
| 258 | |
| 259 | job: job object to notify about |
| 260 | email_from: send notification email upon completion from here |
| 261 | email_from: send notification email upon completion to here |
| 262 | """ |
| 263 | if job.result == True: |
| 264 | subject = 'Testing PASSED: ' |
| 265 | else: |
| 266 | subject = 'Testing FAILED: ' |
| 267 | subject += '%s : %s\n' % (job.name, job.id) |
| 268 | text = [] |
| 269 | for platform in job.results_platform_map: |
| 270 | for status in job.results_platform_map[platform]: |
| 271 | if status == 'Total': |
| 272 | continue |
| 273 | hosts = ','.join(job.results_platform_map[platform][status]) |
mbligh | 37eceaa | 2008-12-15 22:56:37 +0000 | [diff] [blame] | 274 | text.append('%20s %10s %s\n' % (platform, status, hosts)) |
| 275 | |
| 276 | tko_base_url = 'http://%s/tko' % GLOBAL_CONFIG.get_config_value( |
| 277 | 'SERVER', 'hostname', default=DEFAULT_SERVER) |
| 278 | |
| 279 | params = ('columns=test', |
| 280 | 'rows=machine_group', |
| 281 | "condition=tag~'%s-%%25'" % job.id, |
| 282 | 'title=Report') |
| 283 | query_string = '&'.join(params) |
| 284 | url = '%s/compose_query.cgi?%s' % (tko_base_url, query_string) |
| 285 | text.append('\n') |
| 286 | text.append(url) |
| 287 | |
| 288 | body = '\n'.join(text) |
| 289 | print '---------------------------------------------------' |
| 290 | print 'Subject: ', subject |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 291 | print body |
mbligh | 37eceaa | 2008-12-15 22:56:37 +0000 | [diff] [blame] | 292 | print '---------------------------------------------------' |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 293 | if email_from and email_to: |
mbligh | 37eceaa | 2008-12-15 22:56:37 +0000 | [diff] [blame] | 294 | print 'Sending email ...' |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 295 | utils.send_email(email_from, email_to, subject, body) |
| 296 | print |
mbligh | 37eceaa | 2008-12-15 22:56:37 +0000 | [diff] [blame] | 297 | |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 298 | |
mbligh | 1354c9d | 2008-12-22 14:56:13 +0000 | [diff] [blame] | 299 | def print_job_result(self, job): |
| 300 | """ |
| 301 | Print the result of a single job. |
| 302 | job: a job object |
| 303 | """ |
| 304 | if job.result is None: |
| 305 | print 'PENDING', |
| 306 | elif job.result == True: |
| 307 | print 'PASSED', |
| 308 | elif job.result == False: |
| 309 | print 'FAILED', |
| 310 | print ' %s : %s' % (job.id, job.name) |
| 311 | |
| 312 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 313 | def poll_all_jobs(self, tko, jobs, email_from, email_to): |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 314 | """ |
| 315 | Poll all jobs in a list. |
| 316 | jobs: list of job objects to poll |
| 317 | email_from: send notification email upon completion from here |
| 318 | email_from: send notification email upon completion to here |
| 319 | |
| 320 | Returns: |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 321 | a) All complete successfully (return True) |
| 322 | b) One or more has failed (return False) |
| 323 | c) Cannot tell yet (return None) |
| 324 | """ |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 325 | results = [] |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 326 | for job in jobs: |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 327 | job.result = self.poll_job_results(tko, job, debug=False) |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 328 | results.append(job.result) |
| 329 | if job.result is not None and not job.notified: |
| 330 | self.result_notify(job, email_from, email_to) |
| 331 | job.notified = True |
| 332 | |
mbligh | 1354c9d | 2008-12-22 14:56:13 +0000 | [diff] [blame] | 333 | self.print_job_result(job) |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 334 | |
| 335 | if None in results: |
| 336 | return None |
| 337 | elif False in results: |
| 338 | return False |
| 339 | else: |
| 340 | return True |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 341 | |
| 342 | |
mbligh | 1f23f36 | 2008-12-22 14:46:12 +0000 | [diff] [blame] | 343 | def _included_platform(self, host, platforms): |
| 344 | """ |
| 345 | See if host's platforms matches any of the patterns in the included |
| 346 | platforms list. |
| 347 | """ |
| 348 | if not platforms: |
| 349 | return True # No filtering of platforms |
| 350 | for platform in platforms: |
| 351 | if re.search(platform, host.platform): |
| 352 | return True |
| 353 | return False |
| 354 | |
| 355 | |
mbligh | 7b31228 | 2009-01-07 16:45:43 +0000 | [diff] [blame] | 356 | def invoke_test(self, pairing, kernel, kernel_label, priority='Medium', |
| 357 | **dargs): |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 358 | """ |
| 359 | Given a pairing of a control file to a machine label, find all machines |
| 360 | with that label, and submit that control file to them. |
| 361 | |
mbligh | 4e57661 | 2008-12-22 14:56:36 +0000 | [diff] [blame] | 362 | Returns a list of job objects |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 363 | """ |
| 364 | job_name = '%s : %s' % (pairing.machine_label, kernel_label) |
| 365 | hosts = self.get_hosts(multiple_labels=[pairing.machine_label]) |
mbligh | 1f23f36 | 2008-12-22 14:46:12 +0000 | [diff] [blame] | 366 | platforms = pairing.platforms |
| 367 | hosts = [h for h in hosts if self._included_platform(h, platforms)] |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 368 | host_list = [h.hostname for h in hosts if h.status != 'Repair Failed'] |
mbligh | 1f23f36 | 2008-12-22 14:46:12 +0000 | [diff] [blame] | 369 | print 'HOSTS: %s' % host_list |
mbligh | 4e57661 | 2008-12-22 14:56:36 +0000 | [diff] [blame] | 370 | new_jobs = self.create_job_by_test(name=job_name, |
mbligh | 7b31228 | 2009-01-07 16:45:43 +0000 | [diff] [blame] | 371 | dependencies=[pairing.machine_label], |
| 372 | tests=[pairing.control_file], |
| 373 | priority=priority, |
| 374 | hosts=host_list, |
| 375 | kernel=kernel, |
| 376 | use_container=pairing.container, |
| 377 | **dargs) |
mbligh | 4e57661 | 2008-12-22 14:56:36 +0000 | [diff] [blame] | 378 | for new_job in new_jobs: |
| 379 | print 'Invoked test %s : %s' % (new_job.id, job_name) |
| 380 | return new_jobs |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 381 | |
| 382 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 383 | def _job_test_results(self, tko, job): |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 384 | """ |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 385 | Retrieve test results for a job |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 386 | """ |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 387 | job.test_status = {} |
| 388 | try: |
| 389 | test_statuses = tko.get_status_counts(job=job.id) |
| 390 | except Exception: |
| 391 | print "Ignoring exception on poll job; RPC interface is flaky" |
| 392 | traceback.print_exc() |
| 393 | return |
| 394 | |
| 395 | for test_status in test_statuses: |
mbligh | 7479a18 | 2009-01-07 16:46:24 +0000 | [diff] [blame] | 396 | # SERVER_JOB is buggy, and often gives false failures. Ignore it. |
| 397 | if test_status.test_name == 'SERVER_JOB': |
| 398 | continue |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 399 | hostname = test_status.hostname |
| 400 | if hostname not in job.test_status: |
| 401 | job.test_status[hostname] = TestResults() |
| 402 | job.test_status[hostname].add(test_status) |
| 403 | |
| 404 | |
| 405 | def _job_results_platform_map(self, job): |
| 406 | job.results_platform_map = {} |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 407 | try: |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 408 | job_statuses = self.get_host_queue_entries(job=job.id) |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 409 | except Exception: |
| 410 | print "Ignoring exception on poll job; RPC interface is flaky" |
| 411 | traceback.print_exc() |
| 412 | return None |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 413 | |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 414 | platform_map = {} |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 415 | job.job_status = {} |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 416 | for job_status in job_statuses: |
| 417 | hostname = job_status.host.hostname |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 418 | job.job_status[hostname] = job_status.status |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 419 | status = job_status.status |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 420 | if hostname in job.test_status and job.test_status[hostname].fail: |
| 421 | # Job status doesn't reflect failed tests, override that |
| 422 | status = 'Failed' |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 423 | platform = job_status.host.platform |
| 424 | if platform not in platform_map: |
| 425 | platform_map[platform] = {'Total' : [hostname]} |
| 426 | else: |
| 427 | platform_map[platform]['Total'].append(hostname) |
| 428 | new_host_list = platform_map[platform].get(status, []) + [hostname] |
| 429 | platform_map[platform][status] = new_host_list |
mbligh | 45ffc43 | 2008-12-09 23:35:17 +0000 | [diff] [blame] | 430 | job.results_platform_map = platform_map |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 431 | |
| 432 | |
| 433 | def poll_job_results(self, tko, job, debug=False): |
| 434 | """ |
| 435 | Analyse all job results by platform, return: |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 436 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 437 | False: if any platform has more than one failure |
| 438 | None: if any platform has more than one machine not yet Good. |
| 439 | True: if all platforms have at least all-but-one machines Good. |
| 440 | """ |
| 441 | self._job_test_results(tko, job) |
| 442 | self._job_results_platform_map(job) |
| 443 | |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 444 | good_platforms = [] |
| 445 | bad_platforms = [] |
| 446 | unknown_platforms = [] |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 447 | platform_map = job.results_platform_map |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 448 | for platform in platform_map: |
| 449 | total = len(platform_map[platform]['Total']) |
| 450 | completed = len(platform_map[platform].get('Completed', [])) |
| 451 | failed = len(platform_map[platform].get('Failed', [])) |
| 452 | if failed > 1: |
| 453 | bad_platforms.append(platform) |
mbligh | 7479a18 | 2009-01-07 16:46:24 +0000 | [diff] [blame] | 454 | elif (completed > 1) and (completed + 1 >= total): |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 455 | # if all or all but one are good, call the job good. |
| 456 | good_platforms.append(platform) |
| 457 | else: |
| 458 | unknown_platforms.append(platform) |
| 459 | detail = [] |
| 460 | for status in platform_map[platform]: |
| 461 | if status == 'Total': |
| 462 | continue |
| 463 | detail.append('%s=%s' % (status,platform_map[platform][status])) |
| 464 | if debug: |
| 465 | print '%20s %d/%d %s' % (platform, completed, total, |
| 466 | ' '.join(detail)) |
| 467 | print |
| 468 | |
| 469 | if len(bad_platforms) > 0: |
| 470 | if debug: |
| 471 | print 'Result bad - platforms: ' + ' '.join(bad_platforms) |
| 472 | return False |
| 473 | if len(unknown_platforms) > 0: |
| 474 | if debug: |
| 475 | platform_list = ' '.join(unknown_platforms) |
| 476 | print 'Result unknown - platforms: ', platform_list |
| 477 | return None |
| 478 | if debug: |
| 479 | platform_list = ' '.join(good_platforms) |
| 480 | print 'Result good - all platforms passed: ', platform_list |
| 481 | return True |
| 482 | |
| 483 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 484 | class TestResults(object): |
| 485 | """ |
| 486 | Container class used to hold the results of the tests for a job |
| 487 | """ |
| 488 | def __init__(self): |
| 489 | self.good = [] |
| 490 | self.fail = [] |
| 491 | |
| 492 | |
| 493 | def add(self, result): |
| 494 | if result.complete_count - result.pass_count > 0: |
| 495 | self.fail.append(result.test_name) |
| 496 | else: |
| 497 | self.good.append(result.test_name) |
| 498 | |
| 499 | |
| 500 | class RpcObject(object): |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 501 | """ |
| 502 | Generic object used to construct python objects from rpc calls |
| 503 | """ |
| 504 | def __init__(self, afe, hash): |
| 505 | self.afe = afe |
| 506 | self.hash = hash |
| 507 | self.__dict__.update(hash) |
| 508 | |
| 509 | |
| 510 | def __str__(self): |
| 511 | return dump_object(self.__repr__(), self) |
| 512 | |
| 513 | |
mbligh | 1354c9d | 2008-12-22 14:56:13 +0000 | [diff] [blame] | 514 | class ControlFile(RpcObject): |
| 515 | """ |
| 516 | AFE control file object |
| 517 | |
| 518 | Fields: synch_count, dependencies, control_file, is_server |
| 519 | """ |
| 520 | def __repr__(self): |
| 521 | return 'CONTROL FILE: %s' % self.control_file |
| 522 | |
| 523 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 524 | class Label(RpcObject): |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 525 | """ |
| 526 | AFE label object |
| 527 | |
| 528 | Fields: |
| 529 | name, invalid, platform, kernel_config, id, only_if_needed |
| 530 | """ |
| 531 | def __repr__(self): |
| 532 | return 'LABEL: %s' % self.name |
| 533 | |
| 534 | |
| 535 | def add_hosts(self, hosts): |
| 536 | return self.afe.run('label_add_hosts', self.id, hosts) |
| 537 | |
| 538 | |
| 539 | def remove_hosts(self, hosts): |
| 540 | return self.afe.run('label_remove_hosts', self.id, hosts) |
| 541 | |
| 542 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 543 | class Acl(RpcObject): |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 544 | """ |
| 545 | AFE acl object |
| 546 | |
| 547 | Fields: |
| 548 | users, hosts, description, name, id |
| 549 | """ |
| 550 | def __repr__(self): |
| 551 | return 'ACL: %s' % self.name |
| 552 | |
| 553 | |
| 554 | def add_hosts(self, hosts): |
| 555 | self.afe.log('Adding hosts %s to ACL %s' % (hosts, self.name)) |
| 556 | return self.afe.run('acl_group_add_hosts', self.id, hosts) |
| 557 | |
| 558 | |
| 559 | def remove_hosts(self, hosts): |
| 560 | self.afe.log('Removing hosts %s from ACL %s' % (hosts, self.name)) |
| 561 | return self.afe.run('acl_group_remove_hosts', self.id, hosts) |
| 562 | |
| 563 | |
mbligh | 54459c7 | 2009-01-21 19:26:44 +0000 | [diff] [blame] | 564 | def add_users(self, users): |
| 565 | self.afe.log('Adding users %s to ACL %s' % (users, self.name)) |
| 566 | return self.afe.run('acl_group_add_users', id=self.name, users=users) |
| 567 | |
| 568 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 569 | class Job(RpcObject): |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 570 | """ |
| 571 | AFE job object |
| 572 | |
| 573 | Fields: |
| 574 | name, control_file, control_type, synch_count, reboot_before, |
| 575 | run_verify, priority, email_list, created_on, dependencies, |
| 576 | timeout, owner, reboot_after, id |
| 577 | """ |
| 578 | def __repr__(self): |
| 579 | return 'JOB: %s' % self.id |
| 580 | |
| 581 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 582 | class JobStatus(RpcObject): |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 583 | """ |
| 584 | AFE job_status object |
| 585 | |
| 586 | Fields: |
| 587 | status, complete, deleted, meta_host, host, active, execution_subdir, id |
| 588 | """ |
| 589 | def __init__(self, afe, hash): |
| 590 | # This should call super |
| 591 | self.afe = afe |
| 592 | self.hash = hash |
| 593 | self.__dict__.update(hash) |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 594 | self.job = Job(afe, self.job) |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 595 | if self.host: |
| 596 | self.host = afe.get_hosts(hostname=self.host['hostname'])[0] |
| 597 | |
| 598 | |
| 599 | def __repr__(self): |
| 600 | return 'JOB STATUS: %s-%s' % (self.job.id, self.host.hostname) |
| 601 | |
| 602 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 603 | class Host(RpcObject): |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 604 | """ |
| 605 | AFE host object |
| 606 | |
| 607 | Fields: |
| 608 | status, lock_time, locked_by, locked, hostname, invalid, |
| 609 | synch_id, labels, platform, protection, dirty, id |
| 610 | """ |
| 611 | def __repr__(self): |
| 612 | return 'HOST OBJECT: %s' % self.hostname |
| 613 | |
| 614 | |
| 615 | def show(self): |
| 616 | labels = list(set(self.labels) - set([self.platform])) |
| 617 | print '%-6s %-7s %-7s %-16s %s' % (self.hostname, self.status, |
| 618 | self.locked, self.platform, |
| 619 | ', '.join(labels)) |
| 620 | |
| 621 | |
mbligh | 54459c7 | 2009-01-21 19:26:44 +0000 | [diff] [blame] | 622 | def delete(self): |
| 623 | return self.afe.run('delete_host', id=self.id) |
| 624 | |
| 625 | |
mbligh | 6463c4b | 2009-01-30 00:33:37 +0000 | [diff] [blame] | 626 | def modify(self, **dargs): |
| 627 | return self.afe.run('modify_host', id=self.id, **dargs) |
| 628 | |
| 629 | |
mbligh | 6764715 | 2008-11-19 00:18:14 +0000 | [diff] [blame] | 630 | def get_acls(self): |
| 631 | return self.afe.get_acls(hosts__hostname=self.hostname) |
| 632 | |
| 633 | |
| 634 | def add_acl(self, acl_name): |
| 635 | self.afe.log('Adding ACL %s to host %s' % (acl_name, self.hostname)) |
| 636 | return self.afe.run('acl_group_add_hosts', id=acl_name, |
| 637 | hosts=[self.hostname]) |
| 638 | |
| 639 | |
| 640 | def remove_acl(self, acl_name): |
| 641 | self.afe.log('Removing ACL %s from host %s' % (acl_name, self.hostname)) |
| 642 | return self.afe.run('acl_group_remove_hosts', id=acl_name, |
| 643 | hosts=[self.hostname]) |
| 644 | |
| 645 | |
| 646 | def get_labels(self): |
| 647 | return self.afe.get_labels(host__hostname__in=[self.hostname]) |
| 648 | |
| 649 | |
| 650 | def add_labels(self, labels): |
| 651 | self.afe.log('Adding labels %s to host %s' % (labels, self.hostname)) |
| 652 | return self.afe.run('host_add_labels', id=self.id, labels=labels) |
| 653 | |
| 654 | |
| 655 | def remove_labels(self, labels): |
| 656 | self.afe.log('Removing labels %s from host %s' % (labels,self.hostname)) |
| 657 | return self.afe.run('host_remove_labels', id=self.id, labels=labels) |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 658 | |
| 659 | |
mbligh | 54459c7 | 2009-01-21 19:26:44 +0000 | [diff] [blame] | 660 | class User(RpcObject): |
| 661 | def __repr__(self): |
| 662 | return 'USER: %s' % self.login |
| 663 | |
| 664 | |
mbligh | 5280e3b | 2008-12-22 14:39:28 +0000 | [diff] [blame] | 665 | class TestStatus(RpcObject): |
mbligh | c31e402 | 2008-12-11 19:32:30 +0000 | [diff] [blame] | 666 | """ |
| 667 | TKO test status object |
| 668 | |
| 669 | Fields: |
| 670 | test_idx, hostname, testname, id |
| 671 | complete_count, incomplete_count, group_count, pass_count |
| 672 | """ |
| 673 | def __repr__(self): |
| 674 | return 'TEST STATUS: %s' % self.id |
| 675 | |
| 676 | |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 677 | class MachineTestPairing(object): |
| 678 | """ |
| 679 | Object representing the pairing of a machine label with a control file |
mbligh | 1f23f36 | 2008-12-22 14:46:12 +0000 | [diff] [blame] | 680 | |
| 681 | machine_label: use machines from this label |
| 682 | control_file: use this control file (by name in the frontend) |
| 683 | platforms: list of rexeps to filter platforms by. [] => no filtering |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 684 | """ |
mbligh | 1354c9d | 2008-12-22 14:56:13 +0000 | [diff] [blame] | 685 | def __init__(self, machine_label, control_file, platforms=[], |
| 686 | container=False): |
mbligh | 5b61838 | 2008-12-03 15:24:01 +0000 | [diff] [blame] | 687 | self.machine_label = machine_label |
| 688 | self.control_file = control_file |
mbligh | 1f23f36 | 2008-12-22 14:46:12 +0000 | [diff] [blame] | 689 | self.platforms = platforms |
mbligh | 1354c9d | 2008-12-22 14:56:13 +0000 | [diff] [blame] | 690 | self.container = container |
| 691 | |
| 692 | |
| 693 | def __repr__(self): |
| 694 | return '%s %s %s %s' % (self.machine_label, self.control_file, |
| 695 | self.platforms, self.container) |