blob: 40468d70d1d97c5c8542d3e44e39ed27fc6cfa3d [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
Fang Deng5d518f42013-08-02 14:04:32 -070012import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060013import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080014import re
15import tarfile
Garry Wangc1288cf2019-12-17 14:58:00 -080016import time
Gregory Nisbet265a52c2019-12-10 20:38:42 -080017import traceback
Dana Goyette4dc0adc2019-05-06 14:51:53 -070018import xmlrpclib
Fang Deng5d518f42013-08-02 14:04:32 -070019
20from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070021from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib import global_config
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070025from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070026from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000027from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070028from autotest_lib.server.hosts import base_servohost
Dan Shi5e2efb72017-02-07 11:40:23 -080029
Fang Deng5d518f42013-08-02 14:04:32 -070030
Simran Basi0739d682015-02-25 16:22:56 -080031# Names of the host attributes in the database that represent the values for
32# the servo_host and servo_port for a servo connected to the DUT.
33SERVO_HOST_ATTR = 'servo_host'
34SERVO_PORT_ATTR = 'servo_port'
Richard Barnettee519dcd2016-08-15 17:37:17 -070035SERVO_BOARD_ATTR = 'servo_board'
Nick Sanders2f3c9852018-10-24 12:10:24 -070036# Model is inferred from host labels.
37SERVO_MODEL_ATTR = 'servo_model'
Kevin Cheng643ce8a2016-09-15 15:42:12 -070038SERVO_SERIAL_ATTR = 'servo_serial'
Prathmesh Prabhucba44292018-08-28 17:44:45 -070039SERVO_ATTR_KEYS = (
40 SERVO_BOARD_ATTR,
41 SERVO_HOST_ATTR,
42 SERVO_PORT_ATTR,
43 SERVO_SERIAL_ATTR,
44)
Simran Basi0739d682015-02-25 16:22:56 -080045
Garry Wangc1288cf2019-12-17 14:58:00 -080046# Timeout value for stop/start servod process.
47SERVOD_TEARDOWN_TIMEOUT = 3
48SERVOD_QUICK_STARTUP_TIMEOUT = 20
49SERVOD_STARTUP_TIMEOUT = 60
50
Garry Wangd7367482020-02-27 13:52:40 -080051# pools that support dual v4. (go/cros-fw-lab-strategy)
52POOLS_SUPPORT_DUAL_V4 = {'faft-cr50',
53 'faft-cr50-experimental',
54 'faft-cr50-tot',
55 'faft-cr50-debug',
56 'faft_cr50_debug'
57 'faft-pd-debug',
58 'faft_pd_debug'}
59
Dan Shi3b2adf62015-09-02 17:46:54 -070060_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080061ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
62 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080063
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070064AUTOTEST_BASE = _CONFIG.get_config_value(
65 'SCHEDULER', 'drone_installation_directory',
66 default='/usr/local/autotest')
67
Otabek Kasimovcc9738e2020-02-14 16:17:15 -080068SERVO_STATE_LABEL_PREFIX = 'servo_state'
69SERVO_STATE_WORKING = 'WORKING'
70SERVO_STATE_BROKEN = 'BROKEN'
71
Fang Deng5d518f42013-08-02 14:04:32 -070072
Garry Wangebc015b2019-06-06 17:45:06 -070073class ServoHost(base_servohost.BaseServoHost):
74 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070075 that with a servo instance for a specific port.
76
77 @type _servo: servo.Servo | None
78 """
Fang Deng5d518f42013-08-02 14:04:32 -070079
Raul E Rangel52ca2e82018-07-03 14:10:14 -060080 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070081
Dan Shie5b3c512014-08-21 12:12:09 -070082 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070083 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070084
xixuan6cf6d2f2016-01-29 15:29:00 -080085 # Ready test function
86 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070087
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080088 # Directory prefix on the servo host where the servod logs are stored.
89 SERVOD_LOG_PREFIX = '/var/log/servod'
90
91 # Exit code to use when symlinks for servod logs are not found.
92 NO_SYMLINKS_CODE = 9
93
94 # Directory in the job's results directory to dump the logs into.
95 LOG_DIR = 'servod'
96
97 # Prefix for joint loglevel files in the logs.
98 JOINT_LOG_PREFIX = 'log'
99
100 # Regex group to extract timestamp from logfile name.
101 TS_GROUP = 'ts'
102
103 # This regex is used to extract the timestamp from servod logs.
104 # files always start with log.
105 TS_RE = (r'log.'
106 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
107 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
108 # The loglevel is optional depending on labstation version.
109 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
110 TS_EXTRACTOR = re.compile(TS_RE)
111
112 # Regex group to extract MCU name from logline in servod logs.
113 MCU_GROUP = 'mcu'
114
115 # Regex group to extract logline from MCU logline in servod logs.
116 LINE_GROUP = 'line'
117
118 # This regex is used to extract the mcu and the line content from an
119 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
120 # Here is an example log-line:
121 #
122 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
123 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
124 #
125 # Here is conceptually how they are formatted:
126 #
127 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
128 # <output>
129 #
130 # The log format starts with a timestamp
131 MCU_RE = (r'[\d\-]+ [\d:,]+ '
132 # The mcu that is logging this is next.
133 r'- (?P<%s>\w+) - '
134 # Next, we have more log outputs before the actual line.
135 # Information about the file line, logging function etc.
136 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
137 # NOTE: if the log format changes, this regex needs to be
138 # adjusted.
139 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
140 # Lastly, we get the MCU's console line.
141 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
142 MCU_EXTRACTOR = re.compile(MCU_RE)
143
144 # Suffix to identify compressed logfiles.
145 COMPRESSION_SUFFIX = '.tbz2'
146
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800147 def _init_attributes(self):
148 self._servo_state = None
149 self.servo_port = None
150 self.servo_board = None
151 self.servo_model = None
152 self.servo_serial = None
153 self._servo = None
154 self._servod_server_proxy = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800155 # Flag to make sure that multiple calls to close do not result in the
156 # logic executing multiple times.
157 self._closed = False
Fang Deng5d518f42013-08-02 14:04:32 -0700158
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700159 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700160 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700161 servo_model=None, servo_serial=None, is_in_lab=None,
162 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700163 """Initialize a ServoHost instance.
164
165 A ServoHost instance represents a host that controls a servo.
166
167 @param servo_host: Name of the host where the servod process
168 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600169 @param servo_port: Port the servod process is listening on. Defaults
170 to the SERVOD_PORT environment variable if set,
171 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700172 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700173 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800174 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
175 to None, for which utils.host_is_in_lab_zone will be
176 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700177
178 """
179 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700180 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800181 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700182 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700183 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700184 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700185 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800186
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800187 # The location of the log files on the servo host for this instance.
188 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
189 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700190 # Path of the servo host lock file.
191 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
192 + self.LOCK_FILE_POSTFIX)
193 # File path to declare a reboot request.
194 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
195 + self.REBOOT_FILE_POSTFIX)
196
197 # Lock the servo host if it's an in-lab labstation to prevent other
198 # task to reboot it until current task completes. We also wait and
199 # make sure the labstation is up here, in the case of the labstation is
200 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700201 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700202 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
203 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700204 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700205
Richard Barnette9a26ad62016-06-10 12:03:08 -0700206 self._repair_strategy = (
207 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700208
Richard Barnette9a26ad62016-06-10 12:03:08 -0700209 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700210 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700211
212 Initializes `self._servo` and then verifies that all network
213 connections are working. This will create an ssh tunnel if
214 it's required.
215
216 As a side effect of testing the connection, all signals on the
217 target servo are reset to default values, and the USB stick is
218 set to the neutral (off) position.
219 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700220 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Kuang-che Wu05763f52019-08-30 16:48:21 +0800221 self._servo = servo_obj
Richard Barnette9a26ad62016-06-10 12:03:08 -0700222 timeout, _ = retry.timeout(
223 servo_obj.initialize_dut,
224 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
225 if timeout:
226 raise hosts.AutoservVerifyError(
227 'Servo initialize timed out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700228
229
230 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700231 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700232
233 If we've previously successfully connected to our servo,
234 disconnect any established ssh tunnel, and set `self._servo`
235 back to `None`.
236 """
237 if self._servo:
238 # N.B. This call is safe even without a tunnel:
239 # rpc_server_tracker.disconnect() silently ignores
240 # unknown ports.
241 self.rpc_server_tracker.disconnect(self.servo_port)
242 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700243
Garry Wangc1288cf2019-12-17 14:58:00 -0800244
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800245 def _create_servod_server_proxy(self):
246 """Create a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700247
248 @returns: An xmlrpclib.ServerProxy that is connected to the servod
249 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700250 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700251 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
252 return self.rpc_server_tracker.xmlrpc_connect(
253 None, self.servo_port,
254 ready_test_name=self.SERVO_READY_METHOD,
Allen Li2b1a8992018-11-27 14:17:18 -0800255 timeout_seconds=60,
Allen Li556f4532018-12-03 18:11:23 -0800256 request_timeout_seconds=3600)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700257 else:
258 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
259 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700260
261
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800262 def get_servod_server_proxy(self):
263 """Return a cached proxy if exists; otherwise, create a new one.
264
265 @returns: An xmlrpclib.ServerProxy that is connected to the servod
266 server on the host.
267 """
268 # Single-threaded execution, no race
269 if self._servod_server_proxy is None:
270 self._servod_server_proxy = self._create_servod_server_proxy()
271 return self._servod_server_proxy
272
273
Richard Barnette1edbb162016-11-01 11:47:50 -0700274 def verify(self, silent=False):
275 """Update the servo host and verify it's in a good state.
276
277 @param silent If true, suppress logging in `status.log`.
278 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700279 message = 'Beginning verify for servo host %s port %s serial %s'
280 message %= (self.hostname, self.servo_port, self.servo_serial)
281 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700282 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700283 self._repair_strategy.verify(self, silent)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800284 self._servo_state = SERVO_STATE_WORKING
285 self.record('INFO', None, None, 'ServoHost verify set servo_state as WORKING')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700286 except:
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800287 self._servo_state = SERVO_STATE_BROKEN
288 self.record('INFO', None, None, 'ServoHost verify set servo_state as BROKEN')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700289 self.disconnect_servo()
Garry Wangc1288cf2019-12-17 14:58:00 -0800290 self.stop_servod()
Richard Barnette9a26ad62016-06-10 12:03:08 -0700291 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700292
293
Richard Barnette1edbb162016-11-01 11:47:50 -0700294 def repair(self, silent=False):
295 """Attempt to repair servo host.
296
297 @param silent If true, suppress logging in `status.log`.
298 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700299 message = 'Beginning repair for servo host %s port %s serial %s'
300 message %= (self.hostname, self.servo_port, self.servo_serial)
301 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700302 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700303 self._repair_strategy.repair(self, silent)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800304 self._servo_state = SERVO_STATE_WORKING
305 self.record('INFO', None, None, 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700306 # If target is a labstation then try to withdraw any existing
307 # reboot request created by this servo because it passed repair.
308 if self.is_labstation():
309 self.withdraw_reboot_request()
Richard Barnette9a26ad62016-06-10 12:03:08 -0700310 except:
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800311 self._servo_state = SERVO_STATE_BROKEN
312 self.record('INFO', None, None, 'ServoHost repair set servo_state as BROKEN')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700313 self.disconnect_servo()
Garry Wangc1288cf2019-12-17 14:58:00 -0800314 self.stop_servod()
Richard Barnette9a26ad62016-06-10 12:03:08 -0700315 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700316
317
Dan Shi4d478522014-02-14 13:46:32 -0800318 def get_servo(self):
319 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700320
Dan Shi4d478522014-02-14 13:46:32 -0800321 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700322 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700323 """
Dan Shi4d478522014-02-14 13:46:32 -0800324 return self._servo
325
326
Garry Wang79e9af62019-06-12 15:19:19 -0700327 def request_reboot(self):
328 """Request servohost to be rebooted when it's safe to by touch a file.
329 """
330 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700331 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700332 self.run('touch %s' % self._reboot_file, ignore_status=True)
333
334
Garry Wang464ff1e2019-07-18 17:20:34 -0700335 def withdraw_reboot_request(self):
336 """Withdraw a servohost reboot request if exists by remove the flag
337 file.
338 """
339 logging.debug('Withdrawing request to reboot servohost %s that created'
340 ' by servo with port # %s if exists.',
341 self.hostname, self.servo_port)
342 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
343
344
Garry Wangc1288cf2019-12-17 14:58:00 -0800345 def start_servod(self, quick_startup=False):
346 """Start the servod process on servohost.
347 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800348 # Skip if running on the localhost.(crbug.com/1038168)
349 if self.is_localhost():
350 logging.debug("Servohost is a localhost, skipping start servod.")
351 return
352
353 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800354 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800355 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800356 if self.servo_model:
357 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800358 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800359 logging.warning('Board for DUT is unknown; starting servod'
360 ' assuming a pre-configured board.')
361
362 cmd += ' PORT=%d' % self.servo_port
363 if self.servo_serial:
364 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800365
366 # Start servod with dual_v4 if the DUT/servo from designated pools.
367 dut_host_info = self.get_dut_host_info()
368 if dut_host_info:
369 if bool(dut_host_info.pools & POOLS_SUPPORT_DUAL_V4):
370 logging.debug('The DUT is detected in following designated'
371 ' pools %s,starting servod with DUAL_V4 option.',
372 POOLS_SUPPORT_DUAL_V4)
373 cmd += ' DUAL_V4=1'
374
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800375 # Remove the symbolic links from the logs. This helps ensure that
376 # a failed servod instantiation does not cause us to grab old logs
377 # by mistake.
378 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800379 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800380
381 # There's a lag between when `start servod` completes and when
382 # the _ServodConnectionVerifier trigger can actually succeed.
383 # The call to time.sleep() below gives time to make sure that
384 # the trigger won't fail after we return.
385
386 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
387 # But in the rare case all servo on a labstation are in heavy use they
388 # may take ~30 seconds. So the timeout value will double these value,
389 # and we'll try quick start up when first time initialize servohost,
390 # and use standard start up timeout in repair.
391 if quick_startup:
392 timeout = SERVOD_QUICK_STARTUP_TIMEOUT
393 else:
394 timeout = SERVOD_STARTUP_TIMEOUT
395 logging.debug('Wait %s seconds for servod process fully up.', timeout)
396 time.sleep(timeout)
397
398
399 def stop_servod(self):
400 """Stop the servod process on servohost.
401 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800402 # Skip if running on the localhost.(crbug.com/1038168)
403 if self.is_localhost():
404 logging.debug("Servohost is a localhost, skipping stop servod.")
405 return
406
Garry Wangc1288cf2019-12-17 14:58:00 -0800407 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800408 self.run('stop servod PORT=%d' % self.servo_port,
409 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800410 logging.debug('Wait %s seconds for servod process fully teardown.',
411 SERVOD_TEARDOWN_TIMEOUT)
412 time.sleep(SERVOD_TEARDOWN_TIMEOUT)
413
414
415 def restart_servod(self, quick_startup=False):
416 """Restart the servod process on servohost.
417 """
418 self.stop_servod()
419 self.start_servod(quick_startup)
420
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800421 def _extract_compressed_logs(self, logdir, relevant_files):
422 """Decompress servod logs in |logdir|.
423
424 @param logdir: directory containing compressed servod logs.
425 @param relevant_files: list of files in |logdir| to consider.
426
427 @returns: tuple, (tarfiles, files) where
428 tarfiles: list of the compressed filenames that have been
429 extracted and deleted
430 files: list of the uncompressed files that were generated
431 """
432 # For all tar-files, first extract them to the directory, and
433 # then let the common flow handle them.
434 tarfiles = [cf for cf in relevant_files if
435 cf.endswith(self.COMPRESSION_SUFFIX)]
436 files = []
437 for f in tarfiles:
438 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
439 with tarfile.open(f) as tf:
440 # Each tarfile has only one member, as
441 # that's the compressed log.
442 member = tf.members[0]
443 # Manipulate so that it only extracts the basename, and not
444 # the directories etc.
445 member.name = norm_name
446 files.append(os.path.join(logdir, member.name))
447 tf.extract(member, logdir)
448 # File has been extracted: remove the compressed file.
449 os.remove(f)
450 return tarfiles, files
451
452 def _extract_mcu_logs(self, log_subdir):
453 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
454
455 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
456 lines from the logs to generate invidiual console logs e.g. after
457 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
458 those MCUs had any console input/output.
459
460 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
461 """
462 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
463 # files
464 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
465 if not os.path.exists(mcu_lines_file):
466 logging.info('No DEBUG logs found to extract MCU logs from.')
467 return
468 mcu_files = {}
469 mcu_file_template = '%s.txt'
470 with open(mcu_lines_file, 'r') as f:
471 for line in f:
472 match = self.MCU_EXTRACTOR.match(line)
473 if match:
474 mcu = match.group(self.MCU_GROUP).lower()
475 line = match.group(self.LINE_GROUP)
476 if mcu not in mcu_files:
477 mcu_file = os.path.join(log_subdir,
478 mcu_file_template % mcu)
479 mcu_files[mcu] = open(mcu_file, 'a')
480 fd = mcu_files[mcu]
481 fd.write(line + '\n')
482 for f in mcu_files:
483 mcu_files[f].close()
484
485
486 def remove_latest_log_symlinks(self):
487 """Remove the conveninence symlinks 'latest' servod logs."""
488 symlink_wildcard = '%s/latest*' % self.remote_log_dir
489 cmd = 'rm ' + symlink_wildcard
490 self.run(cmd, stderr_tee=None, ignore_status=True)
491
492 def grab_logs(self, outdir):
493 """Retrieve logs from servo_host to |outdir|/servod_{port}.{ts}/.
494
495 This method first collects all logs on the servo_host side pertaining
496 to this servod instance (port, instatiation). It glues them together
497 into combined log.[level].txt files and extracts all available MCU
498 console I/O from the logs into individual files e.g. servo_v4.txt
499
500 All the output can be found in a directory inside |outdir| that
501 this generates based on |LOG_DIR|, the servod port, and the instance
502 timestamp on the servo_host side.
503
504 @param outdir: directory to create a subdirectory into to place the
505 servod logs into.
506 """
507 # First, extract the timestamp. This cmd gives the real filename of
508 # the latest aka current log file.
509 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
510 'then realpath %(dir)s/latest.DEBUG;'
511 'elif [ -f %(dir)s/latest ];'
512 'then realpath %(dir)s/latest;'
513 'else exit %(code)d;'
514 'fi' % {'dir': self.remote_log_dir,
515 'code': self.NO_SYMLINKS_CODE})
516 res = self.run(cmd, stderr_tee=None, ignore_status=True)
517 if res.exit_status != 0:
518 if res.exit_status == self.NO_SYMLINKS_CODE:
519 logging.warning('servod log latest symlinks not found. '
520 'This is likely due to an error starting up '
521 'servod. Ignoring..')
522 else:
523 logging.warning('Failed to find servod logs on servo host.')
524 logging.warning(res.stderr.strip())
525 return
526 fname = os.path.basename(res.stdout.strip())
527 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
528 instance_ts = self.TS_EXTRACTOR.match(fname).group(self.TS_GROUP)
529 # Create the local results log dir.
530 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
531 str(self.servo_port),
532 instance_ts))
533 logging.info('Saving servod logs to %s.', log_dir)
534 os.mkdir(log_dir)
535 # Now, get all files with that timestamp.
536 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
537 instance_ts)
538 res = self.run(cmd, stderr_tee=None, ignore_status=True)
539 files = res.stdout.strip().split()
540 try:
541 self.get_file(files, log_dir, try_rsync=False)
542
543 except error.AutoservRunError as e:
544 result = e.result_obj
545 if result.exit_status != 0:
546 stderr = result.stderr.strip()
547 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
548 stderr or '\n%s' % result)
549 return
550 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
551 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
552 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
553 # Create the joint files for each loglevel. i.e log.DEBUG
554 joint_file = self.JOINT_LOG_PREFIX
555 if level_name:
556 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
557 # This helps with some online tools to avoid complaints about an
558 # unknown filetype.
559 joint_file = joint_file + '.txt'
560 joint_path = os.path.join(log_dir, joint_file)
561 files = [f for f in local_files if level_name in f]
562 if not files:
563 # TODO(crrev.com/c/1793030): remove no-level case once CL
564 # is pushed
565 continue
566 # Extract compressed logs if any.
567 compressed, extracted = self._extract_compressed_logs(log_dir,
568 files)
569 files = list(set(files) - set(compressed))
570 files.extend(extracted)
571 # Need to sort. As they all share the same timestamp, and
572 # loglevel, the index itself is sufficient. The highest index
573 # is the oldest file, therefore we need a descending sort.
574 def sortkey(f, level=level_name):
575 """Custom sortkey to sort based on rotation number int."""
576 if f.endswith(level_name): return 0
577 return int(f.split('.')[-1])
578
579 files.sort(reverse=True, key=sortkey)
580 # Just rename the first file rather than building from scratch.
581 os.rename(files[0], joint_path)
582 with open(joint_path, 'a') as joint_f:
583 for logfile in files[1:]:
584 # Transfer the file to the joint file line by line.
585 with open(logfile, 'r') as log_f:
586 for line in log_f:
587 joint_f.write(line)
588 # File has been written over. Delete safely.
589 os.remove(logfile)
590 # Need to remove all files form |local_files| so we don't
591 # analyze them again.
592 local_files = list(set(local_files) - set(files) - set(compressed))
593 # Lastly, extract MCU logs from the joint logs.
594 self._extract_mcu_logs(log_dir)
595
Garry Wangc1288cf2019-12-17 14:58:00 -0800596
Garry Wang79e9af62019-06-12 15:19:19 -0700597 def _lock(self):
598 """lock servohost by touching a file.
599 """
600 logging.debug('Locking servohost %s by touching %s file',
601 self.hostname, self._lock_file)
602 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700603 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -0700604
605
606 def _unlock(self):
607 """Unlock servohost by removing the lock file.
608 """
609 logging.debug('Unlocking servohost by removing %s file',
610 self._lock_file)
611 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700612 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -0700613
614
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700615 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -0700616 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800617 if self._closed:
618 logging.debug('ServoHost is already closed.')
619 return
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700620 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800621 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -0700622 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800623 self._servo.close(outdir)
624
625 if self.job and not self.is_localhost():
626 # Grab all logs from this servod instance before stopping servod.
627 # TODO(crbug.com/1011516): once enabled, remove the check against
628 # localhost and instead check against log-rotiation enablement.
629 try:
630 self.grab_logs(self.job.resultdir)
631 except error.AutoservRunError as e:
632 logging.info('Failed to grab servo logs due to: %s. '
633 'This error is forgiven.', str(e))
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700634
Garry Wang7c00b0f2019-06-25 17:28:17 -0700635 if self._is_locked:
636 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -0700637 try:
638 self._unlock()
639 except error.AutoservSSHTimeout:
640 logging.error('Unlock servohost failed due to ssh timeout.'
641 ' It may caused by servohost went down during'
642 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -0800643 # We want always stop servod after task to minimum the impact of bad
644 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -0800645 try:
646 self.stop_servod()
647 except error.AutoservRunError as e:
648 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800649 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -0800650
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700651 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800652 # Mark closed.
653 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700654
655
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800656 def get_servo_state(self):
657 return SERVO_STATE_BROKEN if self._servo_state is None else self._servo_state
658
659
Richard Barnetteea3e4602016-06-10 12:36:41 -0700660def make_servo_hostname(dut_hostname):
661 """Given a DUT's hostname, return the hostname of its servo.
662
663 @param dut_hostname: hostname of a DUT.
664
665 @return hostname of the DUT's servo.
666
667 """
668 host_parts = dut_hostname.split('.')
669 host_parts[0] = host_parts[0] + '-servo'
670 return '.'.join(host_parts)
671
672
673def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700674 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700675
676 @param servo_hostname: hostname of the servo host.
677
678 @return True if it's up, False otherwise
679 """
680 # Technically, this duplicates the SSH ping done early in the servo
681 # proxy initialization code. However, this ping ends in a couple
682 # seconds when if fails, rather than the 60 seconds it takes to decide
683 # that an SSH ping has timed out. Specifically, that timeout happens
684 # when our servo DNS name resolves, but there is no host at that IP.
685 logging.info('Pinging servo host at %s', servo_hostname)
686 ping_config = ping_runner.PingConfig(
687 servo_hostname, count=3,
688 ignore_result=True, ignore_status=True)
689 return ping_runner.PingRunner().ping(ping_config).received > 0
690
691
Richard Barnettee519dcd2016-08-15 17:37:17 -0700692def _map_afe_board_to_servo_board(afe_board):
693 """Map a board we get from the AFE to a servo appropriate value.
694
695 Many boards are identical to other boards for servo's purposes.
696 This function makes that mapping.
697
698 @param afe_board string board name received from AFE.
699 @return board we expect servo to have.
700
701 """
702 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
703 BOARD_MAP = {'gizmo': 'panther'}
704 mapped_board = afe_board
705 if afe_board in BOARD_MAP:
706 mapped_board = BOARD_MAP[afe_board]
707 else:
708 for suffix in KNOWN_SUFFIXES:
709 if afe_board.endswith(suffix):
710 mapped_board = afe_board[0:-len(suffix)]
711 break
712 if mapped_board != afe_board:
713 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
714 return mapped_board
715
716
Prathmesh Prabhub4810232018-09-07 13:24:08 -0700717def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700718 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700719
Richard Barnetteea3e4602016-06-10 12:36:41 -0700720 @param dut_host Instance of `Host` on which to find the servo
721 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -0700722 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700723 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700724 info = dut_host.host_info_store.get()
725 servo_args = {k: v for k, v in info.attributes.iteritems()
726 if k in SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -0700727
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700728 if SERVO_PORT_ATTR in servo_args:
729 try:
730 servo_args[SERVO_PORT_ATTR] = int(servo_args[SERVO_PORT_ATTR])
731 except ValueError:
732 logging.error('servo port is not an int: %s',
733 servo_args[SERVO_PORT_ATTR])
734 # Reset servo_args because we don't want to use an invalid port.
735 servo_args.pop(SERVO_HOST_ATTR, None)
736
737 if info.board:
738 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(info.board)
Nick Sanders2f3c9852018-10-24 12:10:24 -0700739 if info.model:
740 servo_args[SERVO_MODEL_ATTR] = info.model
Prathmesh Prabhu6f5f6362018-09-05 17:20:31 -0700741 return servo_args if SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700742
743
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -0700744def _tweak_args_for_ssp_moblab(servo_args):
745 if servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']:
746 servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
747 'SSP', 'host_container_ip', type=str, default=None)
748
749
Dan Shi023aae32016-05-25 11:13:01 -0700750def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -0800751 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700752 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800753
Richard Barnette9a26ad62016-06-10 12:03:08 -0700754 This function attempts to create and verify or repair a `ServoHost`
755 object for a servo connected to the given `dut`, subject to various
756 constraints imposed by the parameters:
757 * When the `servo_args` parameter is not `None`, a servo
758 host must be created, and must be checked with `repair()`.
759 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
760 true:
761 * If `try_servo_repair` is true, then create a servo host and
762 check it with `repair()`.
763 * Otherwise, if the servo responds to `ping` then create a
764 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -0800765
Richard Barnette9a26ad62016-06-10 12:03:08 -0700766 In cases where `servo_args` was not `None`, repair failure
767 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700768 are logged and then discarded. Note that this only happens in cases
769 where we're called from a test (not special task) control file that
770 has an explicit dependency on servo. In that case, we require that
771 repair not write to `status.log`, so as to avoid polluting test
772 results.
773
774 TODO(jrbarnette): The special handling for servo in test control
775 files is a thorn in my flesh; I dearly hope to see it cut out before
776 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700777
778 Parameters for a servo host consist of a host name, port number, and
779 DUT board, and are determined from one of these sources, in order of
780 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -0700781 * Servo attributes from the `dut` parameter take precedence over
782 all other sources of information.
783 * If a DNS entry for the servo based on the DUT hostname exists in
784 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -0700785 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700786 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -0700787 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700788
789 @param dut An instance of `Host` from which to take
790 servo parameters (if available).
791 @param servo_args A dictionary with servo parameters to use if
792 they can't be found from `dut`. If this
793 argument is supplied, unrepaired exceptions
794 from `verify()` will be passed back to the
795 caller.
796 @param try_lab_servo If not true, servo host creation will be
797 skipped unless otherwise required by the
798 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700799 @param try_servo_repair If true, check a servo host with
800 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -0800801
802 @returns: A ServoHost object or None. See comments above.
803
804 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700805 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700806 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -0700807 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -0700808 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -0700809 if utils.in_moblab_ssp():
810 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700811 logging.debug(
812 'Overriding provided servo_args (%s) with arguments'
813 ' determined from the host (%s)',
814 servo_args,
815 servo_args_override,
816 )
Richard Barnetteea3e4602016-06-10 12:36:41 -0700817 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700818
Richard Barnetteea3e4602016-06-10 12:36:41 -0700819 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700820 logging.debug('No servo_args provided, and failed to find overrides.')
Richard Barnetteea3e4602016-06-10 12:36:41 -0700821 return None
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700822 if SERVO_HOST_ATTR not in servo_args:
823 logging.debug('%s attribute missing from servo_args: %s',
824 SERVO_HOST_ATTR, servo_args)
825 return None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700826 if (not servo_dependency and not try_servo_repair and
Richard Barnette9a26ad62016-06-10 12:03:08 -0700827 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700828 logging.debug('ServoHost is not up.')
Dan Shibbb0cb62014-03-24 17:50:57 -0700829 return None
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700830
Garry Wangebc015b2019-06-06 17:45:06 -0700831 newhost = ServoHost(**servo_args)
Garry Wangcdd27b22020-01-13 14:59:11 -0800832 try:
833 newhost.restart_servod(quick_startup=True)
834 except error.AutoservSSHTimeout:
835 logging.warning("Restart servod failed due ssh connection "
836 "to servohost timed out. This error is forgiven"
837 " here, we will retry in servo repair process.")
838 except error.AutoservRunError as e:
839 logging.warning("Restart servod failed due to:\n%s\n"
840 "This error is forgiven here, we will retry"
841 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -0700842
Gregory Nisbetde13e2a2019-12-09 22:44:00 -0800843 # TODO(gregorynisbet): Clean all of this up.
844 logging.debug('create_servo_host: attempt to set info store on '
845 'servo host')
846 try:
847 if dut_host_info is None:
848 logging.debug('create_servo_host: dut_host_info is '
849 'None, skipping')
850 else:
851 newhost.set_dut_host_info(dut_host_info)
852 logging.debug('create_servo_host: successfully set info '
853 'store')
854 except Exception:
855 logging.error("create_servo_host: (%s)", traceback.format_exc())
856
Richard Barnette9a26ad62016-06-10 12:03:08 -0700857 # Note that the logic of repair() includes everything done
858 # by verify(). It's sufficient to call one or the other;
859 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700860 if servo_dependency:
861 newhost.repair(silent=True)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700862 return newhost
863
864 if try_servo_repair:
865 try:
866 newhost.repair()
867 except Exception:
868 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700869 else:
870 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700871 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700872 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700873 logging.exception('servo verify failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700874 return newhost