blob: 128ee8ed00dcbacc368d5370f34254cc597e8d10 [file] [log] [blame]
cmticee5bc63b2015-05-27 16:59:37 -07001#!/usr/bin/python
2#
3# Copyright 2015 Google INc. All Rights Reserved.
4
5import argparse
6import getpass
7import os
8import sys
9import traceback
10
11from utils import logger
12from utils import machines
13from utils import misc
14
15
16class AFELockException(Exception):
17 """Base class for exceptions in this module."""
18
19
20class MachineNotPingable(AFELockException):
21 """Raised when machine does not respond to ping."""
22
23
24class MissingHostInfo(AFELockException):
25 """Raised when cannot find info about machine on machine servers."""
26
27
28class UpdateNonLocalMachine(AFELockException):
29 """Raised when user requests to add/remove a ChromeOS HW Lab machine.."""
30
31
32class DuplicateAdd(AFELockException):
33 """Raised when user requests to add a machine that's already on the server."""
34
35
36class UpdateServerError(AFELockException):
37 """Raised when attempt to add/remove a machine from local server fails."""
38
39
40class LockingError(AFELockException):
41 """Raised when server fails to lock/unlock machine as requested."""
42
43
44class DuplicateLock(AFELockException):
45 """Raised when user attempts to lock an already locked machine."""
46
47
48class DuplicateUnlock(AFELockException):
49 """Raised when user attempts to unlock an already unlocked machine."""
50
51
52class DontOwnLock(AFELockException):
53 """Raised when user attmepts to unlock machine locked by someone else."""
54 # This should not be raised if the user specified '--force'
55
56
57class NoAFEServer(AFELockException):
58 """Raised when cannot find/access the autotest server."""
59
60
61class AFEAccessError(AFELockException):
62 """Raised when cannot get information about lab machine from lab server."""
63
64
65class AFELockManager(object):
66 """Class for locking/unlocking machines vie Autotest Front End servers.
67
68 This class contains methods for checking the locked status of machines
69 on both the ChromeOS HW Lab AFE server and a local AFE server. It also
70 has methods for adding/removing machines from the local server, and for
71 changing the lock status of machines on either server. For the ChromeOS
72 HW Lab, it only allows access to the toolchain team lab machines, as
73 defined in toolchain-utils/crosperf/default_remotes. By default it will
74 look for a local server on chrotomation2.mtv.corp.google.com, but an
75 alternative local AFE server can be supplied, if desired.
76
77 !!!IMPORTANT NOTE!!! The AFE server can only be called from the main
78 thread/process of a program. If you launch threads and try to call it
79 from a thread, you will get an error. This has to do with restrictions
80 in the Python virtual machine (and signal handling) and cannot be changed.
81 """
82
83 LOCAL_SERVER = 'chrotomation2.mtv.corp.google.com'
84
85 def __init__(self, remotes, force_option, chromeos_root, local_server,
86 local=True, log=None):
87 """Initializes an AFELockManager object.
88
89 Args:
90 remotes: A list of machine names or ip addresses to be managed. Names
91 and ip addresses should be represented as strings. If the list is empty,
92 the lock manager will get all known machines.
93 force_option: A Boolean indicating whether or not to force an unlock of
94 a machine that was locked by someone else.
95 chromeos_root: The ChromeOS chroot to use for the autotest scripts.
96 local_server: A string containing the name or ip address of the machine
97 that is running an AFE server, which is to be used for managing
98 machines that are not in the ChromeOS HW lab.
99 local: A Boolean indicating whether or not to use/allow a local AFE
100 server to be used (see local_server argument).
101 log: If not None, this is the logger object to be used for writing out
102 informational output messages. It is expected to be an instance of
103 Logger class from utils/logger.py.
104 """
105 self.chromeos_root = chromeos_root
106 self.user = getpass.getuser()
107 self.logger = log or logger.GetLogger()
108 autotest_path = os.path.join(chromeos_root,
109 'src/third_party/autotest/files')
110
111 sys.path.append(autotest_path)
112 sys.path.append(os.path.join(autotest_path, 'server', 'cros'))
113
114 # We have to wait to do these imports until the paths above have
115 # been fixed.
116 from client import setup_modules
117 setup_modules.setup(base_path=autotest_path,
118 root_module_name='autotest_lib')
119
120 from dynamic_suite import frontend_wrappers
121
122 self.afe = frontend_wrappers.RetryingAFE(timeout_min=30,
123 delay_sec=10,
124 debug=False)
125 if not local:
126 self.local_afe = None
127 else:
128 dargs = {}
129 dargs['server'] = local_server or AFELockManager.LOCAL_SERVER
130 # Make sure local server is pingable.
131 error_msg = ('Local autotest server machine %s not responding to ping.'
132 % dargs['server'])
133 self.CheckMachine(dargs['server'], error_msg)
134 self.local_afe = frontend_wrappers.RetryingAFE(timeout_min=30,
135 delay_sec=10,
136 debug=False,
137 **dargs)
138 self.local = local
139 self.machines = list(set(remotes)) or []
140 self.force = force_option
141 self.toolchain_lab_machines = self.GetAllToolchainLabMachines()
142 if not self.machines:
143 self.machines = self.toolchain_lab_machines + self.GetAllNonlabMachines()
144
145 def CheckMachine(self, machine, error_msg):
146 """Verifies that machine is responding to ping.
147
148 Args:
149 machine: String containing the name or ip address of machine to check.
150 error_msg: Message to print if ping fails.
151
152 Raises:
153 MachineNotPingable: If machine is not responding to 'ping'
154 """
155 if not machines.MachineIsPingable(machine, logging_level='none'):
156 raise MachineNotPingable(error_msg)
157
158 def MachineIsKnown(self, machine):
159 """Checks to see if either AFE server knows the given machine.
160
161 Args:
162 machine: String containing name or ip address of machine to check.
163
164 Returns:
165 Boolean indicating if the machine is in the list of known machines for
166 either AFE server.
167 """
168 if machine in self.toolchain_lab_machines:
169 return True
170 elif self.local_afe and machine in self.GetAllNonlabMachines():
171 return True
172
173 return False
174
175 def GetAllToolchainLabMachines(self):
176 """Gets a list of all the toolchain machines in the ChromeOS HW lab.
177
178 Returns:
179 A list of names of the toolchain machines in the ChromeOS HW lab.
180 """
Han Shen441c9492015-06-11 13:56:08 -0700181 machines_file = os.path.join(os.path.dirname(__file__),
182 'crosperf', 'default_remotes')
cmticee5bc63b2015-05-27 16:59:37 -0700183 machine_list = []
184 with open(machines_file, 'r') as input_file:
185 lines = input_file.readlines()
186 for line in lines:
187 board, remotes = line.split(':')
188 remotes = remotes.strip()
189 for r in remotes.split():
190 machine_list.append(r.strip())
191 return machine_list
192
193 def GetAllNonlabMachines(self):
194 """Gets a list of all known machines on the local AFE server.
195
196 Returns:
197 A list of the names of the machines on the local AFE server.
198 """
199 non_lab_machines = []
200 if self.local_afe:
201 non_lab_machines = self.local_afe.get_hostnames()
202 return non_lab_machines
203
204 def PrintStatusHeader(self, is_lab_machine):
205 """Prints the status header lines for machines.
206
207 Args: Boolean indicating whether to print HW Lab header or local
208 machine header (different spacing).
209 """
210 if is_lab_machine:
211 print '\nMachine (Board)\t\t\t\tStatus'
212 print '---------------\t\t\t\t------\n'
213 else:
214 print '\nMachine (Board)\t\tStatus'
215 print '---------------\t\t------\n'
216
217 def RemoveLocalMachine(self, m):
218 """Removes a machine from the local AFE server.
219
220 Args:
221 m: The machine to remove.
222
223 Raises:
224 MissingHostInfo: Can't find machine to be removed.
225 """
226 if self.local_afe:
227 host_info = self.local_afe.get_hosts(hostname=m)
228 if host_info:
229 host_info = host_info[0]
230 host_info.delete()
231 else:
232 raise MissingHostInfo('Cannot find/delete machine %s.' % m)
233
234 def AddLocalMachine(self, m):
235 """Adds a machine to the local AFE server.
236
237 Args:
238 m: The machine to be added.
239 """
240 if self.local_afe:
241 error_msg = 'Machine %s is not responding to ping.' % m
242 self.CheckMachine(m, error_msg)
243 host = self.local_afe.create_host(m)
244
245 def AddMachinesToLocalServer(self):
246 """Adds one or more machines to the local AFE server.
247
248 Verify that the requested machines are legal to add to the local server,
249 i.e. that they are not ChromeOS HW lab machines, and they are not already
250 on the local server. Call AddLocalMachine for each valid machine.
251
252 Raises:
253 DuplicateAdd: Attempt to add a machine that is already on the server.
254 UpdateNonLocalMachine: Attempt to add a ChromeOS HW lab machine.
255 UpdateServerError: Something went wrong while attempting to add a
256 machine.
257 """
258 for m in self.machines:
259 if m in self.toolchain_lab_machines:
260 raise UpdateNonLocalMachine('Machine %s is already in the ChromeOS HW '
261 'Lab. Cannot add it to local server.' % m)
262 host_info = self.local_afe.get_hosts(hostname=m)
263 if host_info:
264 raise DuplicateAdd('Machine %s is already on the local server.' % m)
265 try:
266 self.AddLocalMachine(m)
267 self.logger.LogOutput('Successfully added %s to local server.' % m)
268 except Exception as e:
269 traceback.print_exc()
270 raise UpdateServerError('Error occurred while attempting to add %s. %s'
271 % (m, str(e)))
272
273 def RemoveMachinesFromLocalServer(self):
274 """Removes one or more machines from the local AFE server.
275
276 Verify that the requested machines are legal to remove from the local
277 server, i.e. that they are not ChromeOS HW lab machines. Call
278 RemoveLocalMachine for each valid machine.
279
280 Raises:
281 UpdateServerError: Something went wrong while attempting to remove a
282 machine.
283 """
284 for m in self.machines:
285 if m in self.toolchain_lab_machines:
286 raise UpdateNonLocalMachine('Machine %s is in the ChromeOS HW Lab. '
287 'This script cannot remove lab machines.'
288 % m)
289 try:
290 self.RemoveLocalMachine(m)
291 self.logger.LogOutput('Successfully removed %s from local server.' % m)
292 except Exception as e:
293 traceback.print_exc()
294 raise UpdateServerError('Error occurred while attempting to remove %s '
295 '(%s).' % (m, str(e)))
296
297 def ListMachineStates(self, machine_states):
298 """Gets and prints the current status for a list of machines.
299
300 Prints out the current status for all of the machines in the current
301 AFELockManager's list of machines (set when the object is initialized).
302
303 Args:
304 machine_states: A dictionary of the current state of every machine in
305 the current AFELockManager's list of machines. Normally obtained by
306 calling AFELockManager::GetMachineStates.
307 """
308 local_machines = []
309 printed_hdr = False
310 for m in machine_states:
311 cros_name = m + '.cros'
312 if (m in self.toolchain_lab_machines or
313 cros_name in self.toolchain_lab_machines):
314 if not printed_hdr:
315 self.PrintStatusHeader(True)
316 printed_hdr = True
317 state = machine_states[m]
318 if state['locked']:
319 print ('%s (%s)\tlocked by %s since %s' %
320 (m, state['board'], state['locked_by'], state['lock_time']))
321 else:
322 print '%s (%s)\tunlocked' % (m, state['board'])
323 else:
324 local_machines.append(m)
325
326 if local_machines:
327 self.PrintStatusHeader(False)
328 for m in local_machines:
329 state = machine_states[m]
330 if state['locked']:
331 print ('%s (%s)\tlocked by %s since %s' %
332 (m, state['board'], state['locked_by'], state['lock_time']))
333 else:
334 print '%s (%s)\tunlocked' % (m, state['board'])
335
336
337 def UpdateLockInAFE(self, should_lock_machine, machine):
338 """Calls an AFE server to lock/unlock a machine.
339
340 Args:
341 should_lock_machine: Boolean indicating whether to lock the machine (True)
342 or unlock the machine (False).
343 machine: The machine to update.
344
345 Raises:
346 LockingError: An error occurred while attempting to update the machine
347 state.
348 """
349 action = 'lock'
350 if not should_lock_machine:
351 action = 'unlock'
352 kwargs = {'locked': should_lock_machine}
353
354 if machine in self.toolchain_lab_machines:
355 m = machine.split('.')[0]
356 kwargs['lock_reason'] = 'toolchain user request (%s)' % self.user
357 afe_server = self.afe
358 else:
359 m = machine
360 afe_server = self.local_afe
361
362 try:
363 afe_server.run('modify_hosts',
364 host_filter_data={'hostname__in': [m]},
365 update_data=kwargs)
366 except Exception as e:
367 traceback.print_exc()
368 raise LockingError('Unable to %s machine %s. %s' % (action, m, str(e)))
369
370 def UpdateMachines(self, lock_machines):
371 """Sets the locked state of the machines to the requested value.
372
373 The machines updated are the ones in self.machines (specified when the
374 class object was intialized).
375
376 Args:
377 lock_machines: Boolean indicating whether to lock the machines (True) or
378 unlock the machines (False).
379 """
380 for m in self.machines:
381 self.UpdateLockInAFE(lock_machines, m)
382
383 # Since we returned from self.UpdateLockInAFE we assume the request
384 # succeeded.
385 if lock_machines:
386 self.logger.LogOutput('Locked machine(s) %s.' % m)
387 else:
388 self.logger.LogOutput('Unlocked machine(s) %s.' % m)
389
390 def CheckMachineLocks(self, machine_states, cmd):
391 """Check that every machine in requested list is in the proper state.
392
393 If the cmd is 'unlock' verify that every machine is locked by requestor.
394 If the cmd is 'lock' verify that every machine is currently unlocked.
395
396 Args:
397 machine_states: A dictionary of the current state of every machine in
398 the current AFELockManager's list of machines. Normally obtained by
399 calling AFELockManager::GetMachineStates.
400 cmd: 'lock' or 'unlock'. The user-requested action for the machines.
401
402 Raises:
403 DuplicateLock: A machine requested to be locked is already locked.
404 DuplicateUnlock: A machine requested to be unlocked is already unlocked.
405 DontOwnLock: The lock on a requested machine is owned by someone else.
406 """
407 for k, state in machine_states.iteritems():
408 if cmd == 'unlock':
409 if not state['locked']:
410 raise DuplicateUnlock('Attempt to unlock already unlocked machine '
411 '(%s).' % k)
412
413 if state['locked_by'] != self.user:
414 raise DontOwnLock('Attempt to unlock machine (%s) locked by someone '
415 'else (%s).' % (k, state['locked_by']))
416 elif cmd == 'lock':
417 if state['locked']:
418 raise DuplicateLock('Attempt to lock already locked machine (%s)' % k)
419
420 def HasAFEServer(self, local):
421 """Verifies that the AFELockManager has appropriate AFE server.
422
423 Args:
424 local: Boolean indicating whether we are checking for the local server
425 (True) or for the global server (False).
426
427 Returns:
428 A boolean indicating if the AFELockManager has the requested AFE server.
429 """
430 if local:
431 return self.local_afe is not None
432 else:
433 return self.afe is not None
434
435 def GetMachineStates(self, cmd=''):
436 """Gets the current state of all the requested machines.
437
438 Gets the current state of all the requested machines, both from the HW lab
439 sever and from the local server. Stores the data in a dictionary keyed
440 by machine name.
441
442 Args:
443 cmd: The command for which we are getting the machine states. This is
444 important because if one of the requested machines is missing we raise
445 an exception, unless the requested command is 'add'.
446
447 Returns:
448 A dictionary of machine states for all the machines in the AFELockManager
449 object.
450
451 Raises:
452 NoAFEServer: Cannot find the HW Lab or local AFE server.
453 AFEAccessError: An error occurred when querying the server about a
454 machine.
455 """
456 if not self.HasAFEServer(False):
457 raise NoAFEServer('Error: Cannot connect to main AFE server.')
458
459 if self.local and not self.HasAFEServer(True):
460 raise NoAFEServer('Error: Cannot connect to local AFE server.')
461
462 machines = {}
463 for m in self.machines:
464 host_info = None
465 if m in self.toolchain_lab_machines:
466 mod_host = m.split('.')[0]
467 host_info = self.afe.get_hosts(hostname=mod_host)
468 if not host_info:
469 raise AFEAccessError('Unable to get information about %s from main'
470 ' autotest server.' % m)
471 else:
472 host_info = self.local_afe.get_hosts(hostname=m)
473 if not host_info and cmd != 'add':
474 raise AFEAccessError('Unable to get information about %s from '
475 'local autotest server.' % m)
476 if host_info:
477 host_info = host_info[0]
478 name = host_info.hostname
479 values = {}
480 values['board'] = host_info.platform if host_info.platform else '??'
481 values['locked'] = host_info.locked
482 if host_info.locked:
483 values['locked_by'] = host_info.locked_by
484 values['lock_time'] = host_info.lock_time
485 else:
486 values['locked_by'] = ''
487 values['lock_time'] = ''
488 machines[name] = values
489 else:
490 machines[m] = {}
491 return machines
492
493
494def Main(argv):
495 """
496 Parse the options, initialize lock manager and dispatch proper method.
497
498 Args:
499 argv: The options with which this script was invoked.
500
501 Returns:
502 0 unless an exception is raised.
503 """
504 parser = argparse.ArgumentParser()
505
506 parser.add_argument('--list', dest='cmd', action='store_const',
507 const='status',
508 help='List current status of all known machines.')
509 parser.add_argument('--lock', dest='cmd', action='store_const',
510 const='lock', help='Lock given machine(s).')
511 parser.add_argument('--unlock', dest='cmd', action='store_const',
512 const='unlock', help='Unlock given machine(s).')
513 parser.add_argument('--status', dest='cmd', action='store_const',
514 const='status',
515 help='List current status of given machine(s).')
516 parser.add_argument('--add_machine', dest='cmd', action='store_const',
517 const='add',
518 help='Add machine to local machine server.')
519 parser.add_argument('--remove_machine', dest='cmd',
520 action='store_const', const='remove',
521 help='Remove machine from the local machine server.')
522 parser.add_argument('--nolocal', dest='local',
523 action='store_false', default=True,
524 help='Do not try to use local machine server.')
525 parser.add_argument('--remote', dest='remote',
526 help='machines on which to operate')
527 parser.add_argument('--chromeos_root', dest='chromeos_root', required=True,
528 help='ChromeOS root to use for autotest scripts.')
529 parser.add_argument('--local_server', dest='local_server', default=None,
530 help='Alternate local autotest server to use.')
531 parser.add_argument('--force', dest='force', action='store_true',
532 default=False,
533 help='Force lock/unlock of machines, even if not'
534 ' current lock owner.')
535
536 options = parser.parse_args(argv)
537
538 if not options.remote and options.cmd != 'status':
539 parser.error('No machines specified for operation.')
540
541 if not os.path.isdir(options.chromeos_root):
542 parser.error('Cannot find chromeos_root: %s.' % options.chromeos_root)
543
544 if not options.cmd:
545 parser.error('No operation selected (--list, --status, --lock, --unlock,'
546 ' --add_machine, --remove_machine).')
547
548 machine_list = []
549 if options.remote:
550 machine_list = options.remote.split()
551
552 lock_manager = AFELockManager(machine_list, options.force,
553 options.chromeos_root, options.local_server,
554 options.local)
555
556 machine_states = lock_manager.GetMachineStates(cmd=options.cmd)
557 cmd = options.cmd
558
559 if cmd == 'status':
560 lock_manager.ListMachineStates(machine_states)
561
562 elif cmd == 'lock':
563 if not lock_manager.force:
564 lock_manager.CheckMachineLocks(machine_states, cmd)
565 lock_manager.UpdateMachines(True)
566
567 elif cmd == 'unlock':
568 if not lock_manager.force:
569 lock_manager.CheckMachineLocks(machine_states, cmd)
570 lock_manager.UpdateMachines(False)
571
572 elif cmd == 'add':
573 lock_manager.AddMachinesToLocalServer()
574
575 elif cmd == 'remove':
576 lock_manager.RemoveMachinesFromLocalServer()
577
578 return 0
579
580
581if __name__ == '__main__':
582 sys.exit(Main(sys.argv[1:]))