blob: 2681a3ed7e344bea53e54cc7479e26d1b5427d6a [file] [log] [blame]
cmticee5bc63b2015-05-27 16:59:37 -07001#!/usr/bin/python
2#
3# Copyright 2015 Google INc. All Rights Reserved.
4
5import argparse
6import getpass
7import os
8import sys
9import traceback
10
11from utils import logger
12from utils import machines
13from utils import misc
14
15
16class AFELockException(Exception):
17 """Base class for exceptions in this module."""
18
19
20class MachineNotPingable(AFELockException):
21 """Raised when machine does not respond to ping."""
22
23
24class MissingHostInfo(AFELockException):
25 """Raised when cannot find info about machine on machine servers."""
26
27
28class UpdateNonLocalMachine(AFELockException):
29 """Raised when user requests to add/remove a ChromeOS HW Lab machine.."""
30
31
32class DuplicateAdd(AFELockException):
33 """Raised when user requests to add a machine that's already on the server."""
34
35
36class UpdateServerError(AFELockException):
37 """Raised when attempt to add/remove a machine from local server fails."""
38
39
40class LockingError(AFELockException):
41 """Raised when server fails to lock/unlock machine as requested."""
42
43
cmticee5bc63b2015-05-27 16:59:37 -070044class DontOwnLock(AFELockException):
45 """Raised when user attmepts to unlock machine locked by someone else."""
46 # This should not be raised if the user specified '--force'
47
48
49class NoAFEServer(AFELockException):
50 """Raised when cannot find/access the autotest server."""
51
52
53class AFEAccessError(AFELockException):
54 """Raised when cannot get information about lab machine from lab server."""
55
56
57class AFELockManager(object):
58 """Class for locking/unlocking machines vie Autotest Front End servers.
59
60 This class contains methods for checking the locked status of machines
61 on both the ChromeOS HW Lab AFE server and a local AFE server. It also
62 has methods for adding/removing machines from the local server, and for
63 changing the lock status of machines on either server. For the ChromeOS
64 HW Lab, it only allows access to the toolchain team lab machines, as
65 defined in toolchain-utils/crosperf/default_remotes. By default it will
66 look for a local server on chrotomation2.mtv.corp.google.com, but an
67 alternative local AFE server can be supplied, if desired.
68
69 !!!IMPORTANT NOTE!!! The AFE server can only be called from the main
70 thread/process of a program. If you launch threads and try to call it
71 from a thread, you will get an error. This has to do with restrictions
72 in the Python virtual machine (and signal handling) and cannot be changed.
73 """
74
75 LOCAL_SERVER = 'chrotomation2.mtv.corp.google.com'
76
77 def __init__(self, remotes, force_option, chromeos_root, local_server,
78 local=True, log=None):
79 """Initializes an AFELockManager object.
80
81 Args:
82 remotes: A list of machine names or ip addresses to be managed. Names
83 and ip addresses should be represented as strings. If the list is empty,
84 the lock manager will get all known machines.
85 force_option: A Boolean indicating whether or not to force an unlock of
86 a machine that was locked by someone else.
87 chromeos_root: The ChromeOS chroot to use for the autotest scripts.
88 local_server: A string containing the name or ip address of the machine
89 that is running an AFE server, which is to be used for managing
90 machines that are not in the ChromeOS HW lab.
91 local: A Boolean indicating whether or not to use/allow a local AFE
92 server to be used (see local_server argument).
93 log: If not None, this is the logger object to be used for writing out
94 informational output messages. It is expected to be an instance of
95 Logger class from utils/logger.py.
96 """
97 self.chromeos_root = chromeos_root
98 self.user = getpass.getuser()
99 self.logger = log or logger.GetLogger()
100 autotest_path = os.path.join(chromeos_root,
101 'src/third_party/autotest/files')
102
cmticed1172b42015-06-12 15:14:09 -0700103 sys.path.append(chromeos_root)
cmticee5bc63b2015-05-27 16:59:37 -0700104 sys.path.append(autotest_path)
105 sys.path.append(os.path.join(autotest_path, 'server', 'cros'))
106
107 # We have to wait to do these imports until the paths above have
108 # been fixed.
109 from client import setup_modules
110 setup_modules.setup(base_path=autotest_path,
111 root_module_name='autotest_lib')
112
113 from dynamic_suite import frontend_wrappers
114
115 self.afe = frontend_wrappers.RetryingAFE(timeout_min=30,
116 delay_sec=10,
cmticed1172b42015-06-12 15:14:09 -0700117 debug=False,
118 server='cautotest')
cmticee5bc63b2015-05-27 16:59:37 -0700119 if not local:
120 self.local_afe = None
121 else:
122 dargs = {}
123 dargs['server'] = local_server or AFELockManager.LOCAL_SERVER
124 # Make sure local server is pingable.
125 error_msg = ('Local autotest server machine %s not responding to ping.'
126 % dargs['server'])
127 self.CheckMachine(dargs['server'], error_msg)
128 self.local_afe = frontend_wrappers.RetryingAFE(timeout_min=30,
129 delay_sec=10,
130 debug=False,
131 **dargs)
132 self.local = local
133 self.machines = list(set(remotes)) or []
134 self.force = force_option
135 self.toolchain_lab_machines = self.GetAllToolchainLabMachines()
136 if not self.machines:
137 self.machines = self.toolchain_lab_machines + self.GetAllNonlabMachines()
138
139 def CheckMachine(self, machine, error_msg):
140 """Verifies that machine is responding to ping.
141
142 Args:
143 machine: String containing the name or ip address of machine to check.
144 error_msg: Message to print if ping fails.
145
146 Raises:
147 MachineNotPingable: If machine is not responding to 'ping'
148 """
149 if not machines.MachineIsPingable(machine, logging_level='none'):
Caroline Tice3f432712015-12-07 14:51:53 -0800150 cros_machine = machine + '.cros'
151 if not machines.MachineIsPingable(cros_machine, logging_level='none'):
152 raise MachineNotPingable(error_msg)
cmticee5bc63b2015-05-27 16:59:37 -0700153
154 def MachineIsKnown(self, machine):
155 """Checks to see if either AFE server knows the given machine.
156
157 Args:
158 machine: String containing name or ip address of machine to check.
159
160 Returns:
161 Boolean indicating if the machine is in the list of known machines for
162 either AFE server.
163 """
164 if machine in self.toolchain_lab_machines:
165 return True
166 elif self.local_afe and machine in self.GetAllNonlabMachines():
167 return True
168
169 return False
170
171 def GetAllToolchainLabMachines(self):
172 """Gets a list of all the toolchain machines in the ChromeOS HW lab.
173
174 Returns:
175 A list of names of the toolchain machines in the ChromeOS HW lab.
176 """
Han Shen441c9492015-06-11 13:56:08 -0700177 machines_file = os.path.join(os.path.dirname(__file__),
178 'crosperf', 'default_remotes')
cmticee5bc63b2015-05-27 16:59:37 -0700179 machine_list = []
180 with open(machines_file, 'r') as input_file:
181 lines = input_file.readlines()
182 for line in lines:
183 board, remotes = line.split(':')
184 remotes = remotes.strip()
185 for r in remotes.split():
186 machine_list.append(r.strip())
187 return machine_list
188
189 def GetAllNonlabMachines(self):
190 """Gets a list of all known machines on the local AFE server.
191
192 Returns:
193 A list of the names of the machines on the local AFE server.
194 """
195 non_lab_machines = []
196 if self.local_afe:
197 non_lab_machines = self.local_afe.get_hostnames()
198 return non_lab_machines
199
200 def PrintStatusHeader(self, is_lab_machine):
201 """Prints the status header lines for machines.
202
203 Args: Boolean indicating whether to print HW Lab header or local
204 machine header (different spacing).
205 """
206 if is_lab_machine:
Caroline Tice3f432712015-12-07 14:51:53 -0800207 print '\nMachine (Board)\t\t\t\t\tStatus'
208 print '---------------\t\t\t\t\t------\n'
cmticee5bc63b2015-05-27 16:59:37 -0700209 else:
210 print '\nMachine (Board)\t\tStatus'
211 print '---------------\t\t------\n'
212
213 def RemoveLocalMachine(self, m):
214 """Removes a machine from the local AFE server.
215
216 Args:
217 m: The machine to remove.
218
219 Raises:
220 MissingHostInfo: Can't find machine to be removed.
221 """
222 if self.local_afe:
223 host_info = self.local_afe.get_hosts(hostname=m)
224 if host_info:
225 host_info = host_info[0]
226 host_info.delete()
227 else:
228 raise MissingHostInfo('Cannot find/delete machine %s.' % m)
229
230 def AddLocalMachine(self, m):
231 """Adds a machine to the local AFE server.
232
233 Args:
234 m: The machine to be added.
235 """
236 if self.local_afe:
237 error_msg = 'Machine %s is not responding to ping.' % m
238 self.CheckMachine(m, error_msg)
239 host = self.local_afe.create_host(m)
240
241 def AddMachinesToLocalServer(self):
242 """Adds one or more machines to the local AFE server.
243
244 Verify that the requested machines are legal to add to the local server,
245 i.e. that they are not ChromeOS HW lab machines, and they are not already
246 on the local server. Call AddLocalMachine for each valid machine.
247
248 Raises:
249 DuplicateAdd: Attempt to add a machine that is already on the server.
250 UpdateNonLocalMachine: Attempt to add a ChromeOS HW lab machine.
251 UpdateServerError: Something went wrong while attempting to add a
252 machine.
253 """
254 for m in self.machines:
Caroline Tice3f432712015-12-07 14:51:53 -0800255 for cros_name in [m, m + '.cros']:
256 if cros_name in self.toolchain_lab_machines:
257 raise UpdateNonLocalMachine('Machine %s is already in the ChromeOS HW'
258 'Lab. Cannot add it to local server.'
259 % cros_name)
cmticee5bc63b2015-05-27 16:59:37 -0700260 host_info = self.local_afe.get_hosts(hostname=m)
261 if host_info:
262 raise DuplicateAdd('Machine %s is already on the local server.' % m)
263 try:
264 self.AddLocalMachine(m)
265 self.logger.LogOutput('Successfully added %s to local server.' % m)
266 except Exception as e:
267 traceback.print_exc()
268 raise UpdateServerError('Error occurred while attempting to add %s. %s'
269 % (m, str(e)))
270
271 def RemoveMachinesFromLocalServer(self):
272 """Removes one or more machines from the local AFE server.
273
274 Verify that the requested machines are legal to remove from the local
275 server, i.e. that they are not ChromeOS HW lab machines. Call
276 RemoveLocalMachine for each valid machine.
277
278 Raises:
279 UpdateServerError: Something went wrong while attempting to remove a
280 machine.
281 """
282 for m in self.machines:
Caroline Tice3f432712015-12-07 14:51:53 -0800283 for cros_name in [m, m + '.cros']:
284 if cros_name in self.toolchain_lab_machines:
285 raise UpdateNonLocalMachine('Machine %s is in the ChromeOS HW Lab. '
286 'This script cannot remove lab machines.'
287 % cros_name)
cmticee5bc63b2015-05-27 16:59:37 -0700288 try:
289 self.RemoveLocalMachine(m)
290 self.logger.LogOutput('Successfully removed %s from local server.' % m)
291 except Exception as e:
292 traceback.print_exc()
293 raise UpdateServerError('Error occurred while attempting to remove %s '
294 '(%s).' % (m, str(e)))
295
296 def ListMachineStates(self, machine_states):
297 """Gets and prints the current status for a list of machines.
298
299 Prints out the current status for all of the machines in the current
300 AFELockManager's list of machines (set when the object is initialized).
301
302 Args:
303 machine_states: A dictionary of the current state of every machine in
304 the current AFELockManager's list of machines. Normally obtained by
305 calling AFELockManager::GetMachineStates.
306 """
307 local_machines = []
308 printed_hdr = False
309 for m in machine_states:
310 cros_name = m + '.cros'
311 if (m in self.toolchain_lab_machines or
312 cros_name in self.toolchain_lab_machines):
Caroline Tice3f432712015-12-07 14:51:53 -0800313 name = m if m in self.toolchain_lab_machines else cros_name
cmticee5bc63b2015-05-27 16:59:37 -0700314 if not printed_hdr:
315 self.PrintStatusHeader(True)
316 printed_hdr = True
317 state = machine_states[m]
318 if state['locked']:
319 print ('%s (%s)\tlocked by %s since %s' %
Caroline Tice3f432712015-12-07 14:51:53 -0800320 (name, state['board'], state['locked_by'], state['lock_time']))
cmticee5bc63b2015-05-27 16:59:37 -0700321 else:
Caroline Tice3f432712015-12-07 14:51:53 -0800322 print '%s (%s)\tunlocked' % (name, state['board'])
cmticee5bc63b2015-05-27 16:59:37 -0700323 else:
324 local_machines.append(m)
325
326 if local_machines:
327 self.PrintStatusHeader(False)
328 for m in local_machines:
329 state = machine_states[m]
330 if state['locked']:
331 print ('%s (%s)\tlocked by %s since %s' %
332 (m, state['board'], state['locked_by'], state['lock_time']))
333 else:
334 print '%s (%s)\tunlocked' % (m, state['board'])
335
336
337 def UpdateLockInAFE(self, should_lock_machine, machine):
338 """Calls an AFE server to lock/unlock a machine.
339
340 Args:
341 should_lock_machine: Boolean indicating whether to lock the machine (True)
342 or unlock the machine (False).
343 machine: The machine to update.
344
345 Raises:
346 LockingError: An error occurred while attempting to update the machine
347 state.
348 """
349 action = 'lock'
350 if not should_lock_machine:
351 action = 'unlock'
352 kwargs = {'locked': should_lock_machine}
cmtice25c94f12015-07-24 11:37:34 -0700353 kwargs['lock_reason'] = 'toolchain user request (%s)' % self.user
cmticee5bc63b2015-05-27 16:59:37 -0700354
Caroline Tice3f432712015-12-07 14:51:53 -0800355 cros_name = machine + '.cros'
356 if cros_name in self.toolchain_lab_machines:
357 machine = cros_name
cmticee5bc63b2015-05-27 16:59:37 -0700358 if machine in self.toolchain_lab_machines:
359 m = machine.split('.')[0]
cmticee5bc63b2015-05-27 16:59:37 -0700360 afe_server = self.afe
361 else:
362 m = machine
363 afe_server = self.local_afe
364
365 try:
366 afe_server.run('modify_hosts',
367 host_filter_data={'hostname__in': [m]},
368 update_data=kwargs)
369 except Exception as e:
370 traceback.print_exc()
371 raise LockingError('Unable to %s machine %s. %s' % (action, m, str(e)))
372
373 def UpdateMachines(self, lock_machines):
374 """Sets the locked state of the machines to the requested value.
375
376 The machines updated are the ones in self.machines (specified when the
377 class object was intialized).
378
379 Args:
380 lock_machines: Boolean indicating whether to lock the machines (True) or
381 unlock the machines (False).
cmticef3eb8032015-07-27 13:55:52 -0700382
383 Returns:
384 A list of the machines whose state was successfully updated.
cmticee5bc63b2015-05-27 16:59:37 -0700385 """
cmticef3eb8032015-07-27 13:55:52 -0700386 updated_machines = []
cmticee5bc63b2015-05-27 16:59:37 -0700387 for m in self.machines:
388 self.UpdateLockInAFE(lock_machines, m)
389
390 # Since we returned from self.UpdateLockInAFE we assume the request
391 # succeeded.
392 if lock_machines:
393 self.logger.LogOutput('Locked machine(s) %s.' % m)
394 else:
395 self.logger.LogOutput('Unlocked machine(s) %s.' % m)
cmticef3eb8032015-07-27 13:55:52 -0700396 updated_machines.append(m)
397
398 return updated_machines
399
400 def _InternalRemoveMachine(self, machine):
401 """Remove machine from internal list of machines.
402
403 Args:
404 machine: Name of machine to be removed from internal list.
405 """
406 # Check to see if machine is lab machine and if so, make sure it has
407 # ".cros" on the end.
408 cros_machine = machine
409 if machine.find('rack') > 0 and machine.find('row') > 0:
410 if machine.find('.cros') == -1:
411 cros_machine = cros_machine + '.cros'
412
413 self.machines = [m for m in self.machines if m != cros_machine and
414 m != machine]
cmticee5bc63b2015-05-27 16:59:37 -0700415
416 def CheckMachineLocks(self, machine_states, cmd):
417 """Check that every machine in requested list is in the proper state.
418
419 If the cmd is 'unlock' verify that every machine is locked by requestor.
420 If the cmd is 'lock' verify that every machine is currently unlocked.
421
422 Args:
423 machine_states: A dictionary of the current state of every machine in
424 the current AFELockManager's list of machines. Normally obtained by
425 calling AFELockManager::GetMachineStates.
426 cmd: 'lock' or 'unlock'. The user-requested action for the machines.
427
428 Raises:
cmticee5bc63b2015-05-27 16:59:37 -0700429 DontOwnLock: The lock on a requested machine is owned by someone else.
430 """
431 for k, state in machine_states.iteritems():
432 if cmd == 'unlock':
433 if not state['locked']:
cmticef3eb8032015-07-27 13:55:52 -0700434 self.logger.LogWarning('Attempt to unlock already unlocked machine '
435 '(%s).' % k)
436 self._InternalRemoveMachine(k)
cmticee5bc63b2015-05-27 16:59:37 -0700437
cmticef3eb8032015-07-27 13:55:52 -0700438 if state['locked'] and state['locked_by'] != self.user:
cmticee5bc63b2015-05-27 16:59:37 -0700439 raise DontOwnLock('Attempt to unlock machine (%s) locked by someone '
440 'else (%s).' % (k, state['locked_by']))
441 elif cmd == 'lock':
442 if state['locked']:
cmticef3eb8032015-07-27 13:55:52 -0700443 self.logger.LogWarning('Attempt to lock already locked machine (%s)' % k)
444 self._InternalRemoveMachine(k)
cmticee5bc63b2015-05-27 16:59:37 -0700445
446 def HasAFEServer(self, local):
447 """Verifies that the AFELockManager has appropriate AFE server.
448
449 Args:
450 local: Boolean indicating whether we are checking for the local server
451 (True) or for the global server (False).
452
453 Returns:
454 A boolean indicating if the AFELockManager has the requested AFE server.
455 """
456 if local:
457 return self.local_afe is not None
458 else:
459 return self.afe is not None
460
461 def GetMachineStates(self, cmd=''):
462 """Gets the current state of all the requested machines.
463
464 Gets the current state of all the requested machines, both from the HW lab
465 sever and from the local server. Stores the data in a dictionary keyed
466 by machine name.
467
468 Args:
469 cmd: The command for which we are getting the machine states. This is
470 important because if one of the requested machines is missing we raise
471 an exception, unless the requested command is 'add'.
472
473 Returns:
474 A dictionary of machine states for all the machines in the AFELockManager
475 object.
476
477 Raises:
478 NoAFEServer: Cannot find the HW Lab or local AFE server.
479 AFEAccessError: An error occurred when querying the server about a
480 machine.
481 """
482 if not self.HasAFEServer(False):
483 raise NoAFEServer('Error: Cannot connect to main AFE server.')
484
485 if self.local and not self.HasAFEServer(True):
486 raise NoAFEServer('Error: Cannot connect to local AFE server.')
487
488 machines = {}
489 for m in self.machines:
490 host_info = None
Caroline Tice3f432712015-12-07 14:51:53 -0800491 cros_name = m + '.cros'
492 if (m in self.toolchain_lab_machines or
493 cros_name in self.toolchain_lab_machines):
cmticee5bc63b2015-05-27 16:59:37 -0700494 mod_host = m.split('.')[0]
495 host_info = self.afe.get_hosts(hostname=mod_host)
496 if not host_info:
497 raise AFEAccessError('Unable to get information about %s from main'
498 ' autotest server.' % m)
499 else:
500 host_info = self.local_afe.get_hosts(hostname=m)
501 if not host_info and cmd != 'add':
502 raise AFEAccessError('Unable to get information about %s from '
503 'local autotest server.' % m)
504 if host_info:
505 host_info = host_info[0]
506 name = host_info.hostname
507 values = {}
508 values['board'] = host_info.platform if host_info.platform else '??'
509 values['locked'] = host_info.locked
510 if host_info.locked:
511 values['locked_by'] = host_info.locked_by
512 values['lock_time'] = host_info.lock_time
513 else:
514 values['locked_by'] = ''
515 values['lock_time'] = ''
516 machines[name] = values
517 else:
518 machines[m] = {}
519 return machines
520
521
522def Main(argv):
523 """
524 Parse the options, initialize lock manager and dispatch proper method.
525
526 Args:
527 argv: The options with which this script was invoked.
528
529 Returns:
530 0 unless an exception is raised.
531 """
532 parser = argparse.ArgumentParser()
533
534 parser.add_argument('--list', dest='cmd', action='store_const',
535 const='status',
536 help='List current status of all known machines.')
537 parser.add_argument('--lock', dest='cmd', action='store_const',
538 const='lock', help='Lock given machine(s).')
539 parser.add_argument('--unlock', dest='cmd', action='store_const',
540 const='unlock', help='Unlock given machine(s).')
541 parser.add_argument('--status', dest='cmd', action='store_const',
542 const='status',
543 help='List current status of given machine(s).')
544 parser.add_argument('--add_machine', dest='cmd', action='store_const',
545 const='add',
546 help='Add machine to local machine server.')
547 parser.add_argument('--remove_machine', dest='cmd',
548 action='store_const', const='remove',
549 help='Remove machine from the local machine server.')
550 parser.add_argument('--nolocal', dest='local',
551 action='store_false', default=True,
552 help='Do not try to use local machine server.')
553 parser.add_argument('--remote', dest='remote',
554 help='machines on which to operate')
555 parser.add_argument('--chromeos_root', dest='chromeos_root', required=True,
556 help='ChromeOS root to use for autotest scripts.')
557 parser.add_argument('--local_server', dest='local_server', default=None,
558 help='Alternate local autotest server to use.')
559 parser.add_argument('--force', dest='force', action='store_true',
560 default=False,
561 help='Force lock/unlock of machines, even if not'
562 ' current lock owner.')
563
564 options = parser.parse_args(argv)
565
566 if not options.remote and options.cmd != 'status':
567 parser.error('No machines specified for operation.')
568
569 if not os.path.isdir(options.chromeos_root):
570 parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root)
571
572 if not options.cmd:
573 parser.error('No operation selected (--list, --status, --lock, --unlock,'
574 ' --add_machine, --remove_machine).')
575
576 machine_list = []
577 if options.remote:
578 machine_list = options.remote.split()
579
580 lock_manager = AFELockManager(machine_list, options.force,
581 options.chromeos_root, options.local_server,
582 options.local)
583
584 machine_states = lock_manager.GetMachineStates(cmd=options.cmd)
585 cmd = options.cmd
586
587 if cmd == 'status':
588 lock_manager.ListMachineStates(machine_states)
589
590 elif cmd == 'lock':
591 if not lock_manager.force:
592 lock_manager.CheckMachineLocks(machine_states, cmd)
593 lock_manager.UpdateMachines(True)
594
595 elif cmd == 'unlock':
596 if not lock_manager.force:
597 lock_manager.CheckMachineLocks(machine_states, cmd)
598 lock_manager.UpdateMachines(False)
599
600 elif cmd == 'add':
601 lock_manager.AddMachinesToLocalServer()
602
603 elif cmd == 'remove':
604 lock_manager.RemoveMachinesFromLocalServer()
605
606 return 0
607
608
609if __name__ == '__main__':
610 sys.exit(Main(sys.argv[1:]))