blob: 46eca0dd8481ac0c0aba72d747d5f059aab269cb [file] [log] [blame]
J. Richard Barnette24adbf42012-04-11 15:04:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Dale Curtisaa5eedb2011-08-23 16:18:52 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
J. Richard Barnette45e93de2012-04-11 17:24:15 -07005from autotest_lib.client.bin import utils
6from autotest_lib.client.common_lib import global_config
7from autotest_lib.client.common_lib.cros import autoupdater
8from autotest_lib.server import autoserv_parser
9from autotest_lib.server import site_host_attributes
10from autotest_lib.server import site_remote_power
J. Richard Barnette67ccb872012-04-19 16:34:56 -070011from autotest_lib.server.cros import servo
J. Richard Barnette45e93de2012-04-11 17:24:15 -070012from autotest_lib.server.hosts import remote
J. Richard Barnette24adbf42012-04-11 15:04:53 -070013
14
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070015def make_ssh_command(user='root', port=22, opts='', hosts_file=None,
16 connect_timeout=None, alive_interval=None):
17 """Override default make_ssh_command to use options tuned for Chrome OS.
18
19 Tuning changes:
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070020 - ConnectTimeout=30; maximum of 30 seconds allowed for an SSH connection
21 failure. Consistency with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070022
Dale Curtisaa5eedb2011-08-23 16:18:52 -070023 - ServerAliveInterval=180; which causes SSH to ping connection every
24 180 seconds. In conjunction with ServerAliveCountMax ensures that if the
25 connection dies, Autotest will bail out quickly. Originally tried 60 secs,
26 but saw frequent job ABORTS where the test completed successfully.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070027
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070028 - ServerAliveCountMax=3; consistency with remote_access.sh.
29
30 - ConnectAttempts=4; reduce flakiness in connection errors; consistency
31 with remote_access.sh.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070032
33 - UserKnownHostsFile=/dev/null; we don't care about the keys. Host keys
34 change with every new installation, don't waste memory/space saving them.
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070035
36 - SSH protocol forced to 2; needed for ServerAliveInterval.
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070037 """
38 base_command = ('/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no'
39 ' -o UserKnownHostsFile=/dev/null -o BatchMode=yes'
Chris Sosaf7fcd6e2011-09-27 17:30:47 -070040 ' -o ConnectTimeout=30 -o ServerAliveInterval=180'
41 ' -o ServerAliveCountMax=3 -o ConnectionAttempts=4'
42 ' -o Protocol=2 -l %s -p %d')
Dale Curtiscb7bfaf2011-06-07 16:21:57 -070043 return base_command % (opts, user, port)
J. Richard Barnette45e93de2012-04-11 17:24:15 -070044
45
46class SiteHost(remote.RemoteHost):
47 """Chromium OS specific subclass of Host."""
48
49 _parser = autoserv_parser.autoserv_parser
50
51 # Time to wait for new kernel to be marked successful.
52 _KERNEL_UPDATE_TIMEOUT = 60
53
54 # Ephemeral file to indicate that an update has just occurred.
55 _JUST_UPDATED_FLAG = '/tmp/just_updated'
56
J. Richard Barnette67ccb872012-04-19 16:34:56 -070057 def _initialize(self, hostname, require_servo=False, *args, **dargs):
58 """Initialize superclasses, and |self.servo|.
59
60 For creating the host servo object, there are three
61 possibilities: First, if the host is a lab system known to
62 have a servo board, we connect to that servo unconditionally.
63 Second, if we're called from a control file that requires
64 servo features for testing, it will pass |require_servo| set
65 to |True|, and we will start a local servod. If neither of
66 these cases apply, |self.servo| will be |None|.
67
68 """
J. Richard Barnette45e93de2012-04-11 17:24:15 -070069 super(SiteHost, self)._initialize(hostname=hostname,
70 *args, **dargs)
J. Richard Barnette67ccb872012-04-19 16:34:56 -070071 self.servo = servo.Servo.get_lab_servo(hostname)
72 if not self.servo and require_servo:
73 self.servo = servo.Servo()
J. Richard Barnette45e93de2012-04-11 17:24:15 -070074
75
76 def machine_install(self, update_url=None, force_update=False):
77 if not update_url and self._parser.options.image:
78 update_url = self._parser.options.image
79 elif not update_url:
80 raise autoupdater.ChromiumOSError(
81 'Update failed. No update URL provided.')
82
83 # Attempt to update the system.
84 updater = autoupdater.ChromiumOSUpdater(update_url, host=self)
85 if updater.run_update(force_update):
86 # Figure out active and inactive kernel.
87 active_kernel, inactive_kernel = updater.get_kernel_state()
88
89 # Ensure inactive kernel has higher priority than active.
90 if (updater.get_kernel_priority(inactive_kernel)
91 < updater.get_kernel_priority(active_kernel)):
92 raise autoupdater.ChromiumOSError(
93 'Update failed. The priority of the inactive kernel'
94 ' partition is less than that of the active kernel'
95 ' partition.')
96
97 # Updater has returned, successfully, reboot the host.
98 self.reboot(timeout=60, wait=True)
99
100 # Following the reboot, verify the correct version.
101 updater.check_version()
102
103 # Figure out newly active kernel.
104 new_active_kernel, _ = updater.get_kernel_state()
105
106 # Ensure that previously inactive kernel is now the active kernel.
107 if new_active_kernel != inactive_kernel:
108 raise autoupdater.ChromiumOSError(
109 'Update failed. New kernel partition is not active after'
110 ' boot.')
111
112 host_attributes = site_host_attributes.HostAttributes(self.hostname)
113 if host_attributes.has_chromeos_firmware:
114 # Wait until tries == 0 and success, or until timeout.
115 utils.poll_for_condition(
116 lambda: (updater.get_kernel_tries(new_active_kernel) == 0
117 and updater.get_kernel_success(new_active_kernel)),
118 exception=autoupdater.ChromiumOSError(
119 'Update failed. Timed out waiting for system to mark'
120 ' new kernel as successful.'),
121 timeout=self._KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
122
123 # TODO(dalecurtis): Hack for R12 builds to make sure BVT runs of
124 # platform_Shutdown pass correctly.
125 if updater.update_version.startswith('0.12'):
126 self.reboot(timeout=60, wait=True)
127
128 # Mark host as recently updated. Hosts are rebooted at the end of
129 # every test cycle which will remove the file.
130 self.run('touch %s' % self._JUST_UPDATED_FLAG)
131
132 # Clean up any old autotest directories which may be lying around.
133 for path in global_config.global_config.get_config_value(
134 'AUTOSERV', 'client_autodir_paths', type=list):
135 self.run('rm -rf ' + path)
136
137
138 def has_just_updated(self):
139 """Indicates whether the host was updated within this boot."""
140 # Check for the existence of the just updated flag file.
141 return self.run(
142 '[ -f %s ] && echo T || echo F'
143 % self._JUST_UPDATED_FLAG).stdout.strip() == 'T'
144
145
146 def cleanup(self):
147 """Special cleanup method to make sure hosts always get power back."""
148 super(SiteHost, self).cleanup()
149 remote_power = site_remote_power.RemotePower(self.hostname)
150 if remote_power:
151 remote_power.set_power_on()
152
153
154 def verify_software(self):
155 """Ensure the stateful partition has space for Autotest and updates.
156
157 Similar to what is done by AbstractSSH, except instead of checking the
158 Autotest installation path, just check the stateful partition.
159
160 Checking the stateful partition is preferable in case it has been wiped,
161 resulting in an Autotest installation path which doesn't exist and isn't
162 writable. We still want to pass verify in this state since the partition
163 will be recovered with the next install.
164 """
165 super(SiteHost, self).verify_software()
166 self.check_diskspace(
167 '/mnt/stateful_partition',
168 global_config.global_config.get_config_value(
169 'SERVER', 'gb_diskspace_required', type=int,
170 default=20))