blob: fdd97d18bc15797387844a322616b129321b3661 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
Fang Deng5d518f42013-08-02 14:04:32 -070012import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060013import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080014import re
15import tarfile
Garry Wangc1288cf2019-12-17 14:58:00 -080016import time
Gregory Nisbet265a52c2019-12-10 20:38:42 -080017import traceback
Dana Goyette4dc0adc2019-05-06 14:51:53 -070018import xmlrpclib
Fang Deng5d518f42013-08-02 14:04:32 -070019
20from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070021from autotest_lib.client.common_lib import error
Richard Barnette9a26ad62016-06-10 12:03:08 -070022from autotest_lib.client.common_lib import hosts
Garry Wang7b0e1b72020-03-25 19:08:59 -070023from autotest_lib.client.common_lib import lsbrelease_utils
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070025from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070026from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000027from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070028from autotest_lib.server.hosts import base_servohost
Garry Wang11b5e872020-03-11 15:14:08 -070029from autotest_lib.server.hosts import servo_constants
30from autotest_lib.client.common_lib import global_config
Garry Wangd7367482020-02-27 13:52:40 -080031
Dan Shi3b2adf62015-09-02 17:46:54 -070032_CONFIG = global_config.global_config
Fang Deng5d518f42013-08-02 14:04:32 -070033
Otabek Kasimova7ba91a2020-03-09 08:31:01 -070034
Garry Wangebc015b2019-06-06 17:45:06 -070035class ServoHost(base_servohost.BaseServoHost):
36 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070037 that with a servo instance for a specific port.
38
39 @type _servo: servo.Servo | None
40 """
Fang Deng5d518f42013-08-02 14:04:32 -070041
Raul E Rangel52ca2e82018-07-03 14:10:14 -060042 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070043
Dan Shie5b3c512014-08-21 12:12:09 -070044 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070045 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070046
xixuan6cf6d2f2016-01-29 15:29:00 -080047 # Ready test function
48 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070049
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080050 # Directory prefix on the servo host where the servod logs are stored.
51 SERVOD_LOG_PREFIX = '/var/log/servod'
52
53 # Exit code to use when symlinks for servod logs are not found.
54 NO_SYMLINKS_CODE = 9
55
56 # Directory in the job's results directory to dump the logs into.
57 LOG_DIR = 'servod'
58
59 # Prefix for joint loglevel files in the logs.
60 JOINT_LOG_PREFIX = 'log'
61
62 # Regex group to extract timestamp from logfile name.
63 TS_GROUP = 'ts'
64
65 # This regex is used to extract the timestamp from servod logs.
66 # files always start with log.
67 TS_RE = (r'log.'
68 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
69 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
70 # The loglevel is optional depending on labstation version.
71 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
72 TS_EXTRACTOR = re.compile(TS_RE)
73
74 # Regex group to extract MCU name from logline in servod logs.
75 MCU_GROUP = 'mcu'
76
77 # Regex group to extract logline from MCU logline in servod logs.
78 LINE_GROUP = 'line'
79
80 # This regex is used to extract the mcu and the line content from an
81 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
82 # Here is an example log-line:
83 #
84 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
85 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
86 #
87 # Here is conceptually how they are formatted:
88 #
89 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
90 # <output>
91 #
92 # The log format starts with a timestamp
93 MCU_RE = (r'[\d\-]+ [\d:,]+ '
94 # The mcu that is logging this is next.
95 r'- (?P<%s>\w+) - '
96 # Next, we have more log outputs before the actual line.
97 # Information about the file line, logging function etc.
98 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
99 # NOTE: if the log format changes, this regex needs to be
100 # adjusted.
101 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
102 # Lastly, we get the MCU's console line.
103 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
104 MCU_EXTRACTOR = re.compile(MCU_RE)
105
106 # Suffix to identify compressed logfiles.
107 COMPRESSION_SUFFIX = '.tbz2'
108
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700109 # A suffix to mark servod log directories that came from instance that
110 # ran during this servo_host, but are not the last one running e.g. when
111 # an instance (on purpose, or due to a bug) restarted in the middle of the
112 # run.
113 OLD_LOG_SUFFIX = 'old'
114
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800115 def _init_attributes(self):
116 self._servo_state = None
117 self.servo_port = None
118 self.servo_board = None
119 self.servo_model = None
120 self.servo_serial = None
121 self._servo = None
122 self._servod_server_proxy = None
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700123 self._initial_instance_ts = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800124 # Flag to make sure that multiple calls to close do not result in the
125 # logic executing multiple times.
126 self._closed = False
Fang Deng5d518f42013-08-02 14:04:32 -0700127
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700128 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700129 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700130 servo_model=None, servo_serial=None, is_in_lab=None,
131 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700132 """Initialize a ServoHost instance.
133
134 A ServoHost instance represents a host that controls a servo.
135
136 @param servo_host: Name of the host where the servod process
137 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600138 @param servo_port: Port the servod process is listening on. Defaults
139 to the SERVOD_PORT environment variable if set,
140 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700141 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700142 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800143 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
144 to None, for which utils.host_is_in_lab_zone will be
145 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700146
147 """
148 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700149 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800150 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700151 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700152 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700153 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700154 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800155
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800156 # The location of the log files on the servo host for this instance.
157 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
158 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700159 # Path of the servo host lock file.
160 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
161 + self.LOCK_FILE_POSTFIX)
162 # File path to declare a reboot request.
163 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
164 + self.REBOOT_FILE_POSTFIX)
165
166 # Lock the servo host if it's an in-lab labstation to prevent other
167 # task to reboot it until current task completes. We also wait and
168 # make sure the labstation is up here, in the case of the labstation is
169 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700170 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700171 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
172 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700173 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700174
Richard Barnette9a26ad62016-06-10 12:03:08 -0700175 self._repair_strategy = (
176 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700177
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700178 def __str__(self):
179 return "<%s '%s:%s'>" % (
180 type(self).__name__, self.hostname, self.servo_port)
181
Richard Barnette9a26ad62016-06-10 12:03:08 -0700182 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700183 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700184
185 Initializes `self._servo` and then verifies that all network
186 connections are working. This will create an ssh tunnel if
187 it's required.
188
189 As a side effect of testing the connection, all signals on the
190 target servo are reset to default values, and the USB stick is
191 set to the neutral (off) position.
192 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700193 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Kuang-che Wu05763f52019-08-30 16:48:21 +0800194 self._servo = servo_obj
Richard Barnette9a26ad62016-06-10 12:03:08 -0700195 timeout, _ = retry.timeout(
196 servo_obj.initialize_dut,
197 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
198 if timeout:
199 raise hosts.AutoservVerifyError(
200 'Servo initialize timed out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700201
202
203 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700204 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700205
206 If we've previously successfully connected to our servo,
207 disconnect any established ssh tunnel, and set `self._servo`
208 back to `None`.
209 """
210 if self._servo:
211 # N.B. This call is safe even without a tunnel:
212 # rpc_server_tracker.disconnect() silently ignores
213 # unknown ports.
214 self.rpc_server_tracker.disconnect(self.servo_port)
215 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700216
Garry Wangc1288cf2019-12-17 14:58:00 -0800217
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800218 def _create_servod_server_proxy(self):
219 """Create a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700220
221 @returns: An xmlrpclib.ServerProxy that is connected to the servod
222 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700223 """
Garry Wang11b5e872020-03-11 15:14:08 -0700224 if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
225 and not self.is_localhost()):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700226 return self.rpc_server_tracker.xmlrpc_connect(
227 None, self.servo_port,
228 ready_test_name=self.SERVO_READY_METHOD,
Allen Li2b1a8992018-11-27 14:17:18 -0800229 timeout_seconds=60,
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700230 request_timeout_seconds=3600,
231 server_desc=str(self))
Richard Barnette9a26ad62016-06-10 12:03:08 -0700232 else:
233 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
234 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700235
236
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800237 def get_servod_server_proxy(self):
238 """Return a cached proxy if exists; otherwise, create a new one.
239
240 @returns: An xmlrpclib.ServerProxy that is connected to the servod
241 server on the host.
242 """
243 # Single-threaded execution, no race
244 if self._servod_server_proxy is None:
245 self._servod_server_proxy = self._create_servod_server_proxy()
246 return self._servod_server_proxy
247
248
Richard Barnette1edbb162016-11-01 11:47:50 -0700249 def verify(self, silent=False):
250 """Update the servo host and verify it's in a good state.
251
252 @param silent If true, suppress logging in `status.log`.
253 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700254 message = 'Beginning verify for servo host %s port %s serial %s'
255 message %= (self.hostname, self.servo_port, self.servo_serial)
256 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700257 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700258 self._repair_strategy.verify(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700259 self._servo_state = servo_constants.SERVO_STATE_WORKING
260 self.record('INFO', None, None,
261 'ServoHost verify set servo_state as WORKING')
Garry Wang63b8c382020-03-11 22:28:40 -0700262 except Exception as e:
Garry Wang11b5e872020-03-11 15:14:08 -0700263 self._servo_state = servo_constants.SERVO_STATE_BROKEN
264 self.record('INFO', None, None,
265 'ServoHost verify set servo_state as BROKEN')
Garry Wang63b8c382020-03-11 22:28:40 -0700266 if self._is_critical_error(e):
267 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700268
269
Garry Wang7b0e1b72020-03-25 19:08:59 -0700270 def get_image_name_from_usbkey(self, usbkey_dev):
271 """Mount usb drive and check ChromeOS image name on it if there is
272 one. This method assumes the image_usbkey_direction is already set
273 to servo side.
274
275 @param: usbkey_dev usbkey dev path(e.g. /dev/sdb).
276
277 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
278 or empty string if no test image detected, or unexpected
279 error occurred.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700280 """
Garry Wang70e5d062020-04-03 18:01:05 -0700281 logging.info('Checking ChromeOS image name on usbkey.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700282 usb_mount_path = '/media/servo_usb/%s' % self.servo_port
283 unmount_cmd = 'umount %s' % usb_mount_path
284 # ChromeOS root fs is in /dev/sdx3
285 mount_cmd = 'mount -o ro %s3 %s' % (usbkey_dev, usb_mount_path)
286 # Unmount if there is an existing stale mount.
287 self.run(unmount_cmd, ignore_status=True)
288 # Create if the mount point is not existing.
289 self.run('mkdir -p %s' % usb_mount_path)
290 try:
291 # Attempt to mount the usb drive.
292 mount_result = self.run(mount_cmd, ignore_status=True)
Garry Wang70e5d062020-04-03 18:01:05 -0700293 if mount_result.exit_status != 0:
294 logging.error('Unexpected error occurred on mount usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700295 return ''
Garry Wang70e5d062020-04-03 18:01:05 -0700296
297 release_content = self.run(
298 'cat %s/etc/lsb-release' % usb_mount_path,
299 ignore_status=True).stdout.strip()
300
301 if not re.search(r'RELEASE_TRACK=.*test', release_content):
302 logging.info('The image on usbkey is not a test image')
303 return ''
304
305 return lsbrelease_utils.get_chromeos_release_builder_path(
306 lsb_release_content=release_content)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700307 finally:
Garry Wang70e5d062020-04-03 18:01:05 -0700308 logging.debug('Image check compeleted, unmounting the usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700309 self.run(unmount_cmd, ignore_status=True)
310
311
Garry Wang70e5d062020-04-03 18:01:05 -0700312 def _probe_and_validate_usb_dev(self):
313 """This method probe the usb dev path by talking to servo, and then
314 validate the dev path is valid block device to servohost.
315 Possible output:
316 1. Encounter error during probe usb dev, returns empty string.
317 2. probe usb dev completed without error but cannot find usb dev,
318 raise AutoservRepairError.
319 3. probe usb dev find a usb dev path, but failed validation in this
320 method, raise AutoservRepairError.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700321
Garry Wang70e5d062020-04-03 18:01:05 -0700322 @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
323 if unexpected error occurred during probe.
324 @raises: AutoservRepairError if servo couldn't probe the usb dev path
325 (servo.probe_host_usb_dev() returns empty string), or the dev path is
326 not valid block device to servohost.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700327 """
328 logging.info('Validating image usbkey on servo.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700329 try:
Garry Wang70e5d062020-04-03 18:01:05 -0700330 usb_dev = self._servo.probe_host_usb_dev()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700331 except Exception as e:
332 # We don't want any unexpected or transient servo communicating
333 # failure block usb repair, so capture all errors here.
334 logging.error(e, exc_info=True)
335 logging.error('Unexpected error occurred on get usbkey dev path,'
336 ' skipping usbkey validation.')
337 return ''
338
Garry Wang70e5d062020-04-03 18:01:05 -0700339 if usb_dev:
340 # probe_host_usb_dev() sometimes return stale record,
341 # so we need to make sure the path exists in fdisk.
342 validate_cmd = 'fdisk -l | grep %s' % usb_dev
343 resp = self.run(validate_cmd, ignore_status=True)
344 if resp.exit_status == 0:
345 return usb_dev
346
347 logging.error('%s is reported from "image_usbkey_dev" control but'
348 ' not detected from fdisk!', usb_dev)
349
350 raise hosts.AutoservRepairError(
351 'No usbkey detected on servo, the usbkey may be either missing'
352 ' or broken. Please replace usbkey on the servo and retry.',
353 'missing usbkey')
354
355
356 def validate_image_usbkey(self):
357 """This method first validate if there is a recover usbkey on servo
358 that accessible to servohost, and second check if a ChromeOS image is
359 already on the usb drive and return the image_name so we can avoid
360 unnecessary download and flash to the recover usbkey on servo.
361
362 Please note that, there is special error handling logic here:
363 1. If unexpected error happens, we return empty string. So repair
364 actions will not get blocked.
365 2. If no working usbkey present on servo, but no errors, we'll raise
366 AutoservRepairError here.
367
368 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
369 or empty string if no test image detected, or unexpected
370 error occurred.
371 @raises: AutoservRepairError if the usbkey is not detected on servo.
372 """
373 usb_dev = self._probe_and_validate_usb_dev()
374 if usb_dev:
375 return self.get_image_name_from_usbkey(usb_dev)
376 else:
377 return ''
Garry Wang7b0e1b72020-03-25 19:08:59 -0700378
379
Richard Barnette1edbb162016-11-01 11:47:50 -0700380 def repair(self, silent=False):
381 """Attempt to repair servo host.
382
383 @param silent If true, suppress logging in `status.log`.
384 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700385 message = 'Beginning repair for servo host %s port %s serial %s'
386 message %= (self.hostname, self.servo_port, self.servo_serial)
387 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700388 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700389 self._repair_strategy.repair(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700390 self._servo_state = servo_constants.SERVO_STATE_WORKING
391 self.record('INFO', None, None,
392 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700393 # If target is a labstation then try to withdraw any existing
394 # reboot request created by this servo because it passed repair.
395 if self.is_labstation():
396 self.withdraw_reboot_request()
Garry Wang63b8c382020-03-11 22:28:40 -0700397 except Exception as e:
Garry Wang11b5e872020-03-11 15:14:08 -0700398 self._servo_state = servo_constants.SERVO_STATE_BROKEN
399 self.record('INFO', None, None,
400 'ServoHost repair set servo_state as BROKEN')
Garry Wang63b8c382020-03-11 22:28:40 -0700401 if self._is_critical_error(e):
402 self.disconnect_servo()
403 self.stop_servod()
404 raise
405
406
407 def _is_critical_error(self, error):
408 if (isinstance(error, hosts.AutoservVerifyDependencyError)
409 and not error.is_critical()):
410 logging.warning('Non-critical verify failure(s) detected during'
411 ' verify/repair servo, servo connection will'
412 ' still up but may not fully functional.'
413 ' Some repair actions and servo depended'
414 ' tests may not run.')
415 return False
416 logging.info('Critical verify failure(s) detected during repair/verify'
417 ' servo. Disconnecting servo and stop servod, all repair '
418 'action and tests that depends on servo will not run.')
419 return True
Fang Deng5d518f42013-08-02 14:04:32 -0700420
421
Dan Shi4d478522014-02-14 13:46:32 -0800422 def get_servo(self):
423 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700424
Dan Shi4d478522014-02-14 13:46:32 -0800425 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700426 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700427 """
Dan Shi4d478522014-02-14 13:46:32 -0800428 return self._servo
429
430
Garry Wang79e9af62019-06-12 15:19:19 -0700431 def request_reboot(self):
432 """Request servohost to be rebooted when it's safe to by touch a file.
433 """
434 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700435 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700436 self.run('touch %s' % self._reboot_file, ignore_status=True)
437
438
Garry Wang464ff1e2019-07-18 17:20:34 -0700439 def withdraw_reboot_request(self):
440 """Withdraw a servohost reboot request if exists by remove the flag
441 file.
442 """
443 logging.debug('Withdrawing request to reboot servohost %s that created'
444 ' by servo with port # %s if exists.',
445 self.hostname, self.servo_port)
446 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
447
448
Garry Wangc1288cf2019-12-17 14:58:00 -0800449 def start_servod(self, quick_startup=False):
450 """Start the servod process on servohost.
451 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800452 # Skip if running on the localhost.(crbug.com/1038168)
453 if self.is_localhost():
454 logging.debug("Servohost is a localhost, skipping start servod.")
455 return
456
457 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800458 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800459 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800460 if self.servo_model:
461 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800462 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800463 logging.warning('Board for DUT is unknown; starting servod'
464 ' assuming a pre-configured board.')
465
466 cmd += ' PORT=%d' % self.servo_port
467 if self.servo_serial:
468 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800469
470 # Start servod with dual_v4 if the DUT/servo from designated pools.
471 dut_host_info = self.get_dut_host_info()
472 if dut_host_info:
Garry Wang11b5e872020-03-11 15:14:08 -0700473 if bool(dut_host_info.pools &
474 servo_constants.POOLS_SUPPORT_DUAL_V4):
Garry Wangd7367482020-02-27 13:52:40 -0800475 logging.debug('The DUT is detected in following designated'
476 ' pools %s,starting servod with DUAL_V4 option.',
Garry Wang11b5e872020-03-11 15:14:08 -0700477 servo_constants.POOLS_SUPPORT_DUAL_V4)
Garry Wangd7367482020-02-27 13:52:40 -0800478 cmd += ' DUAL_V4=1'
479
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800480 # Remove the symbolic links from the logs. This helps ensure that
481 # a failed servod instantiation does not cause us to grab old logs
482 # by mistake.
483 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800484 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800485
486 # There's a lag between when `start servod` completes and when
487 # the _ServodConnectionVerifier trigger can actually succeed.
488 # The call to time.sleep() below gives time to make sure that
489 # the trigger won't fail after we return.
490
491 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
492 # But in the rare case all servo on a labstation are in heavy use they
493 # may take ~30 seconds. So the timeout value will double these value,
494 # and we'll try quick start up when first time initialize servohost,
495 # and use standard start up timeout in repair.
496 if quick_startup:
Garry Wang11b5e872020-03-11 15:14:08 -0700497 timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800498 else:
Garry Wang11b5e872020-03-11 15:14:08 -0700499 timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800500 logging.debug('Wait %s seconds for servod process fully up.', timeout)
501 time.sleep(timeout)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700502 # Cache the initial instance timestamp to check against servod restarts
503 self._initial_instance_ts = self.get_instance_logs_ts()
Garry Wangc1288cf2019-12-17 14:58:00 -0800504
505
506 def stop_servod(self):
507 """Stop the servod process on servohost.
508 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800509 # Skip if running on the localhost.(crbug.com/1038168)
510 if self.is_localhost():
511 logging.debug("Servohost is a localhost, skipping stop servod.")
512 return
513
Garry Wangc1288cf2019-12-17 14:58:00 -0800514 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800515 self.run('stop servod PORT=%d' % self.servo_port,
516 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800517 logging.debug('Wait %s seconds for servod process fully teardown.',
Garry Wang11b5e872020-03-11 15:14:08 -0700518 servo_constants.SERVOD_TEARDOWN_TIMEOUT)
519 time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
Garry Wangc1288cf2019-12-17 14:58:00 -0800520
521
522 def restart_servod(self, quick_startup=False):
523 """Restart the servod process on servohost.
524 """
525 self.stop_servod()
526 self.start_servod(quick_startup)
527
Garry Wang11b5e872020-03-11 15:14:08 -0700528
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800529 def _extract_compressed_logs(self, logdir, relevant_files):
530 """Decompress servod logs in |logdir|.
531
532 @param logdir: directory containing compressed servod logs.
533 @param relevant_files: list of files in |logdir| to consider.
534
535 @returns: tuple, (tarfiles, files) where
536 tarfiles: list of the compressed filenames that have been
537 extracted and deleted
538 files: list of the uncompressed files that were generated
539 """
540 # For all tar-files, first extract them to the directory, and
541 # then let the common flow handle them.
542 tarfiles = [cf for cf in relevant_files if
543 cf.endswith(self.COMPRESSION_SUFFIX)]
544 files = []
545 for f in tarfiles:
546 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
547 with tarfile.open(f) as tf:
548 # Each tarfile has only one member, as
549 # that's the compressed log.
550 member = tf.members[0]
551 # Manipulate so that it only extracts the basename, and not
552 # the directories etc.
553 member.name = norm_name
554 files.append(os.path.join(logdir, member.name))
555 tf.extract(member, logdir)
556 # File has been extracted: remove the compressed file.
557 os.remove(f)
558 return tarfiles, files
559
560 def _extract_mcu_logs(self, log_subdir):
561 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
562
563 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
564 lines from the logs to generate invidiual console logs e.g. after
565 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
566 those MCUs had any console input/output.
567
568 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
569 """
570 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
571 # files
572 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
573 if not os.path.exists(mcu_lines_file):
574 logging.info('No DEBUG logs found to extract MCU logs from.')
575 return
576 mcu_files = {}
577 mcu_file_template = '%s.txt'
578 with open(mcu_lines_file, 'r') as f:
579 for line in f:
580 match = self.MCU_EXTRACTOR.match(line)
581 if match:
582 mcu = match.group(self.MCU_GROUP).lower()
583 line = match.group(self.LINE_GROUP)
584 if mcu not in mcu_files:
585 mcu_file = os.path.join(log_subdir,
586 mcu_file_template % mcu)
587 mcu_files[mcu] = open(mcu_file, 'a')
588 fd = mcu_files[mcu]
589 fd.write(line + '\n')
590 for f in mcu_files:
591 mcu_files[f].close()
592
593
594 def remove_latest_log_symlinks(self):
595 """Remove the conveninence symlinks 'latest' servod logs."""
596 symlink_wildcard = '%s/latest*' % self.remote_log_dir
597 cmd = 'rm ' + symlink_wildcard
598 self.run(cmd, stderr_tee=None, ignore_status=True)
599
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700600 def probe_servod_restart(self, instance_ts, outdir):
601 """Grab servod logs from previous instances if part of this session.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800602
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700603 If since the last time this host called start_servod() servod crashed
604 and restarted, this helper finds those logs as well, and stores them
605 with the |OLD_LOG_SUFFIX| to investigate if necessary.
Prasad Vuppalapu5bd9da12020-03-31 01:46:47 +0000606
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700607 It also issues a panicinfo command to servo devices after the restart
608 to try and collect reboot information for debugging.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800609
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700610 @param instance_ts: the log timestamp that the current instance uses
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800611 @param outdir: directory to create a subdirectory into to place the
612 servod logs into.
613 """
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700614 if self._initial_instance_ts is None:
615 logging.info('No log timestamp grabbed successfully on servod '
616 'startup. Cannot check device restarts. Ignoring.')
617 return
618 if instance_ts == self._initial_instance_ts:
619 logging.debug('Servod appears to have run without restarting')
620 return
621 # Servod seems to have restarted (at least once). |_initial_instance_ts|
622 # is the first timestamp, and instance_ts is the current timestamp. Find
623 # all timestamps in between them, and grab the logs for each.
624 tss = self._find_instance_timestamps_between(self._initial_instance_ts,
625 instance_ts)
626 logging.info('Servod has restarted %d times between the start and the '
627 'end of this servo_host.', len(tss))
628 logging.info('This might be an issue. Will extract all logs from each '
629 'instance.')
630 logging.info('Logs that are not the currently running (about to turn '
631 'down) instance are maked with a .%s in their folder.',
632 self.OLD_LOG_SUFFIX)
633 for ts in tss:
634 self.get_instance_logs(ts, outdir, old=True)
635 # Lastly, servod has restarted due to a potential issue. Try to get
636 # panic information from servo micro and servo v4 for the current logs.
637 # This can only happen if the |_servo| attribute is initialized.
638 if self._servo:
639 for mcu in ['servo_micro', 'servo_v4']:
640 ctrl = '%s_uart_cmd' % mcu
641 if self._servo.has_control(ctrl):
642 logging.info('Trying to retrieve %r panicinfo into logs',
643 mcu)
644 try:
645 self._servo.set_nocheck(ctrl, 'panicinfo')
646 except error.TestFail as e:
647 logging.error('Failed to generate panicinfo for %r '
648 'logs. %s', mcu, str(e))
649
650 def _find_instance_timestamps_between(self, start_ts, end_ts):
651 """Find all log timestamps between [start_ts, end_ts).
652
653 @param start_ts: str, earliest log timestamp of interest
654 @param end_ts: str, latest log timestamp of interest
655
656 @returns: list, all timestamps between start_ts and end_ts, end_ts
657 exclusive, on the servo_host. An empty list on errors
658 """
659 # Simply get all timestamp, and then sort and remove
660 cmd = 'ls %s' % self.remote_log_dir
661 res = self.run(cmd, stderr_tee=None, ignore_status=True)
662 if res.exit_status != 0:
663 # Here we failed to find anything.
664 logging.info('Failed to find remote servod logs. Ignoring.')
665 return []
666 logfiles = res.stdout.strip().split()
667 timestamps = set()
668 for logfile in logfiles:
669 ts_match = self.TS_EXTRACTOR.match(logfile)
670 if not ts_match:
671 # Simply ignore files that fail the check. It might be the
672 # 'latest' symlinks or random files.
673 continue
674 timestamps.add(ts_match.group(self.TS_GROUP))
675 # At this point we have all unique timestamps.
676 timestamps = sorted(timestamps)
677 for ts in [start_ts, end_ts]:
678 if ts not in timestamps:
679 logging.error('Timestamp %r not in servod logs. Cannot query '
680 'for timestamps in between %r and %r', ts,
681 start_ts, end_ts)
682 return []
683 return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
684
685 def get_instance_logs_ts(self):
686 """Retrieve the currently running servod instance's log timestamp
687
688 @returns: str, timestamp for current instance, or None on failure
689 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800690 # First, extract the timestamp. This cmd gives the real filename of
691 # the latest aka current log file.
692 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
693 'then realpath %(dir)s/latest.DEBUG;'
694 'elif [ -f %(dir)s/latest ];'
695 'then realpath %(dir)s/latest;'
696 'else exit %(code)d;'
697 'fi' % {'dir': self.remote_log_dir,
698 'code': self.NO_SYMLINKS_CODE})
699 res = self.run(cmd, stderr_tee=None, ignore_status=True)
700 if res.exit_status != 0:
701 if res.exit_status == self.NO_SYMLINKS_CODE:
702 logging.warning('servod log latest symlinks not found. '
703 'This is likely due to an error starting up '
704 'servod. Ignoring..')
705 else:
706 logging.warning('Failed to find servod logs on servo host.')
707 logging.warning(res.stderr.strip())
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700708 return None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800709 fname = os.path.basename(res.stdout.strip())
710 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
Ruben Rodriguez Buchillone9aa2b02020-03-04 12:14:28 -0800711 ts_match = self.TS_EXTRACTOR.match(fname)
712 if not ts_match:
713 logging.warning('Failed to extract timestamp from servod log file '
714 '%r. Skipping. The servo host is using outdated '
715 'servod logging and needs to be updated.', fname)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700716 return None
717 return ts_match.group(self.TS_GROUP)
718
719 def get_instance_logs(self, instance_ts, outdir, old=False):
720 """Collect all logs with |instance_ts| and dump into a dir in |outdir|
721
722 This method first collects all logs on the servo_host side pertaining
723 to this servod instance (port, instatiation). It glues them together
724 into combined log.[level].txt files and extracts all available MCU
725 console I/O from the logs into individual files e.g. servo_v4.txt
726
727 All the output can be found in a directory inside |outdir| that
728 this generates based on |LOG_DIR|, the servod port, and the instance
729 timestamp on the servo_host side.
730
731 @param instance_ts: log timestamp to grab logfiles for
732 @param outdir: directory to create a subdirectory into to place the
733 servod logs into.
734 @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
735 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800736 # Create the local results log dir.
737 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
738 str(self.servo_port),
739 instance_ts))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700740 if old:
741 log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
742 logging.info('Saving servod logs to %r.', log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800743 os.mkdir(log_dir)
744 # Now, get all files with that timestamp.
745 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
746 instance_ts)
747 res = self.run(cmd, stderr_tee=None, ignore_status=True)
748 files = res.stdout.strip().split()
749 try:
750 self.get_file(files, log_dir, try_rsync=False)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700751 if not os.listdir(log_dir):
752 logging.info('No servod logs retrieved. Ignoring, and removing '
753 '%r again.', log_dir)
754 os.rmdir(log_dir)
755 return
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800756 except error.AutoservRunError as e:
757 result = e.result_obj
758 if result.exit_status != 0:
759 stderr = result.stderr.strip()
760 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
761 stderr or '\n%s' % result)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700762 # Remove the log_dir as nothing was added to it.
763 os.rmdir(log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800764 return
765 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
766 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
767 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
768 # Create the joint files for each loglevel. i.e log.DEBUG
769 joint_file = self.JOINT_LOG_PREFIX
770 if level_name:
771 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
772 # This helps with some online tools to avoid complaints about an
773 # unknown filetype.
774 joint_file = joint_file + '.txt'
775 joint_path = os.path.join(log_dir, joint_file)
776 files = [f for f in local_files if level_name in f]
777 if not files:
778 # TODO(crrev.com/c/1793030): remove no-level case once CL
779 # is pushed
780 continue
781 # Extract compressed logs if any.
782 compressed, extracted = self._extract_compressed_logs(log_dir,
783 files)
784 files = list(set(files) - set(compressed))
785 files.extend(extracted)
786 # Need to sort. As they all share the same timestamp, and
787 # loglevel, the index itself is sufficient. The highest index
788 # is the oldest file, therefore we need a descending sort.
789 def sortkey(f, level=level_name):
790 """Custom sortkey to sort based on rotation number int."""
791 if f.endswith(level_name): return 0
792 return int(f.split('.')[-1])
793
794 files.sort(reverse=True, key=sortkey)
795 # Just rename the first file rather than building from scratch.
796 os.rename(files[0], joint_path)
797 with open(joint_path, 'a') as joint_f:
798 for logfile in files[1:]:
799 # Transfer the file to the joint file line by line.
800 with open(logfile, 'r') as log_f:
801 for line in log_f:
802 joint_f.write(line)
803 # File has been written over. Delete safely.
804 os.remove(logfile)
805 # Need to remove all files form |local_files| so we don't
806 # analyze them again.
807 local_files = list(set(local_files) - set(files) - set(compressed))
808 # Lastly, extract MCU logs from the joint logs.
809 self._extract_mcu_logs(log_dir)
810
Garry Wang79e9af62019-06-12 15:19:19 -0700811 def _lock(self):
812 """lock servohost by touching a file.
813 """
814 logging.debug('Locking servohost %s by touching %s file',
815 self.hostname, self._lock_file)
816 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700817 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -0700818
819
820 def _unlock(self):
821 """Unlock servohost by removing the lock file.
822 """
823 logging.debug('Unlocking servohost by removing %s file',
824 self._lock_file)
825 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700826 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -0700827
828
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700829 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -0700830 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700831 # NOTE: throughout this method there are multiple attempts to catch
832 # all errors. This is WAI as log grabbing should not fail tests.
833 # However, the goal is to catch and handle/process all errors, thus
834 # we print the traceback and ask for a bug.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800835 if self._closed:
836 logging.debug('ServoHost is already closed.')
837 return
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700838 instance_ts = self.get_instance_logs_ts()
839 # TODO(crbug.com/1011516): once enabled, remove the check against
840 # localhost and instead check against log-rotiation enablement.
841 logs_available = (instance_ts is not None and
842 self.job and
843 not self.is_localhost())
844 if logs_available:
845 # Probe whether there was a servod restart, and grab those old
846 # logs as well.
847 try:
848 self.probe_servod_restart(instance_ts, self.job.resultdir)
849 except (error.AutoservRunError, error.TestFail) as e:
850 logging.info('Failed to grab servo logs due to: %s. '
851 'This error is forgiven.', str(e))
852 except Exception as e:
853 logging.error('Unexpected error probing for old logs. %s. '
854 'Forgiven. Please file a bug and fix or catch '
855 'in log probing function', str(e),
856 exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700857 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800858 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -0700859 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800860 self._servo.close(outdir)
861
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700862 if logs_available:
863 # Grab current (not old like above) logs after the servo instance
864 # was closed out.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800865 try:
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700866 self.get_instance_logs(instance_ts, self.job.resultdir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800867 except error.AutoservRunError as e:
868 logging.info('Failed to grab servo logs due to: %s. '
869 'This error is forgiven.', str(e))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700870 except Exception as e:
871 logging.error('Unexpected error grabbing servod logs. %s. '
872 'Forgiven. Please file a bug and fix or catch '
873 'in log grabbing function', str(e), exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700874
Garry Wang7c00b0f2019-06-25 17:28:17 -0700875 if self._is_locked:
876 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -0700877 try:
878 self._unlock()
879 except error.AutoservSSHTimeout:
880 logging.error('Unlock servohost failed due to ssh timeout.'
881 ' It may caused by servohost went down during'
882 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -0800883 # We want always stop servod after task to minimum the impact of bad
884 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -0800885 try:
886 self.stop_servod()
887 except error.AutoservRunError as e:
888 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800889 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -0800890
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700891 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800892 # Mark closed.
893 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700894
895
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800896 def get_servo_state(self):
Otabek Kasimova7ba91a2020-03-09 08:31:01 -0700897 if self._servo_state is None:
Garry Wang11b5e872020-03-11 15:14:08 -0700898 return servo_constants.SERVO_STATE_UNKNOWN
Otabek Kasimova7ba91a2020-03-09 08:31:01 -0700899 return self._servo_state
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800900
901
Richard Barnetteea3e4602016-06-10 12:36:41 -0700902def make_servo_hostname(dut_hostname):
903 """Given a DUT's hostname, return the hostname of its servo.
904
905 @param dut_hostname: hostname of a DUT.
906
907 @return hostname of the DUT's servo.
908
909 """
910 host_parts = dut_hostname.split('.')
911 host_parts[0] = host_parts[0] + '-servo'
912 return '.'.join(host_parts)
913
914
915def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700916 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700917
918 @param servo_hostname: hostname of the servo host.
919
920 @return True if it's up, False otherwise
921 """
922 # Technically, this duplicates the SSH ping done early in the servo
923 # proxy initialization code. However, this ping ends in a couple
924 # seconds when if fails, rather than the 60 seconds it takes to decide
925 # that an SSH ping has timed out. Specifically, that timeout happens
926 # when our servo DNS name resolves, but there is no host at that IP.
927 logging.info('Pinging servo host at %s', servo_hostname)
928 ping_config = ping_runner.PingConfig(
929 servo_hostname, count=3,
930 ignore_result=True, ignore_status=True)
931 return ping_runner.PingRunner().ping(ping_config).received > 0
932
933
Richard Barnettee519dcd2016-08-15 17:37:17 -0700934def _map_afe_board_to_servo_board(afe_board):
935 """Map a board we get from the AFE to a servo appropriate value.
936
937 Many boards are identical to other boards for servo's purposes.
938 This function makes that mapping.
939
940 @param afe_board string board name received from AFE.
941 @return board we expect servo to have.
942
943 """
944 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
945 BOARD_MAP = {'gizmo': 'panther'}
946 mapped_board = afe_board
947 if afe_board in BOARD_MAP:
948 mapped_board = BOARD_MAP[afe_board]
949 else:
950 for suffix in KNOWN_SUFFIXES:
951 if afe_board.endswith(suffix):
952 mapped_board = afe_board[0:-len(suffix)]
953 break
954 if mapped_board != afe_board:
955 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
956 return mapped_board
957
958
Prathmesh Prabhub4810232018-09-07 13:24:08 -0700959def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700960 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700961
Richard Barnetteea3e4602016-06-10 12:36:41 -0700962 @param dut_host Instance of `Host` on which to find the servo
963 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -0700964 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700965 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700966 info = dut_host.host_info_store.get()
967 servo_args = {k: v for k, v in info.attributes.iteritems()
Garry Wang11b5e872020-03-11 15:14:08 -0700968 if k in servo_constants.SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -0700969
Garry Wang11b5e872020-03-11 15:14:08 -0700970 if servo_constants.SERVO_PORT_ATTR in servo_args:
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700971 try:
Garry Wang11b5e872020-03-11 15:14:08 -0700972 servo_args[servo_constants.SERVO_PORT_ATTR] = int(
973 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700974 except ValueError:
975 logging.error('servo port is not an int: %s',
Garry Wang11b5e872020-03-11 15:14:08 -0700976 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700977 # Reset servo_args because we don't want to use an invalid port.
Garry Wang11b5e872020-03-11 15:14:08 -0700978 servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700979
980 if info.board:
Garry Wang11b5e872020-03-11 15:14:08 -0700981 servo_board = _map_afe_board_to_servo_board(info.board)
982 servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700983 if info.model:
Garry Wang11b5e872020-03-11 15:14:08 -0700984 servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
985 return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700986
987
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -0700988def _tweak_args_for_ssp_moblab(servo_args):
Garry Wang11b5e872020-03-11 15:14:08 -0700989 if (servo_args[servo_constants.SERVO_HOST_ATTR]
990 in ['localhost', '127.0.0.1']):
991 servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -0700992 'SSP', 'host_container_ip', type=str, default=None)
993
994
Dan Shi023aae32016-05-25 11:13:01 -0700995def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -0800996 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700997 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800998
Richard Barnette9a26ad62016-06-10 12:03:08 -0700999 This function attempts to create and verify or repair a `ServoHost`
1000 object for a servo connected to the given `dut`, subject to various
1001 constraints imposed by the parameters:
1002 * When the `servo_args` parameter is not `None`, a servo
1003 host must be created, and must be checked with `repair()`.
1004 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1005 true:
1006 * If `try_servo_repair` is true, then create a servo host and
1007 check it with `repair()`.
1008 * Otherwise, if the servo responds to `ping` then create a
1009 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -08001010
Richard Barnette9a26ad62016-06-10 12:03:08 -07001011 In cases where `servo_args` was not `None`, repair failure
1012 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001013 are logged and then discarded. Note that this only happens in cases
1014 where we're called from a test (not special task) control file that
1015 has an explicit dependency on servo. In that case, we require that
1016 repair not write to `status.log`, so as to avoid polluting test
1017 results.
1018
1019 TODO(jrbarnette): The special handling for servo in test control
1020 files is a thorn in my flesh; I dearly hope to see it cut out before
1021 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001022
1023 Parameters for a servo host consist of a host name, port number, and
1024 DUT board, and are determined from one of these sources, in order of
1025 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -07001026 * Servo attributes from the `dut` parameter take precedence over
1027 all other sources of information.
1028 * If a DNS entry for the servo based on the DUT hostname exists in
1029 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -07001030 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001031 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -07001032 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001033
1034 @param dut An instance of `Host` from which to take
1035 servo parameters (if available).
1036 @param servo_args A dictionary with servo parameters to use if
1037 they can't be found from `dut`. If this
1038 argument is supplied, unrepaired exceptions
1039 from `verify()` will be passed back to the
1040 caller.
1041 @param try_lab_servo If not true, servo host creation will be
1042 skipped unless otherwise required by the
1043 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001044 @param try_servo_repair If true, check a servo host with
1045 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -08001046
1047 @returns: A ServoHost object or None. See comments above.
1048
1049 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001050 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001051 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001052 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -07001053 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001054 if utils.in_moblab_ssp():
1055 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001056 logging.debug(
1057 'Overriding provided servo_args (%s) with arguments'
1058 ' determined from the host (%s)',
1059 servo_args,
1060 servo_args_override,
1061 )
Richard Barnetteea3e4602016-06-10 12:36:41 -07001062 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001063
Richard Barnetteea3e4602016-06-10 12:36:41 -07001064 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001065 logging.debug('No servo_args provided, and failed to find overrides.')
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001066 if try_lab_servo or servo_dependency:
Garry Wang11b5e872020-03-11 15:14:08 -07001067 return None, servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001068 else:
1069 # For regular test case which not required the servo
1070 return None, None
1071
Garry Wang11b5e872020-03-11 15:14:08 -07001072 servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1073 servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001074 if not _is_servo_host_information_exist(servo_hostname, servo_port):
1075 logging.debug(
1076 'Servo connection info missed hostname: %s , port: %s',
1077 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001078 return None, servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001079 if not is_servo_host_information_valid(servo_hostname, servo_port):
1080 logging.debug(
1081 'Servo connection info is incorrect hostname: %s , port: %s',
1082 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001083 return None, servo_constants.SERVO_STATE_WRONG_CONFIG
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001084 if (not servo_dependency and not try_servo_repair and
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001085 not servo_host_is_up(servo_hostname)):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001086 logging.debug('ServoHost is not up.')
Garry Wang11b5e872020-03-11 15:14:08 -07001087 return None, servo_constants.SERVO_STATE_BROKEN
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001088
Garry Wangebc015b2019-06-06 17:45:06 -07001089 newhost = ServoHost(**servo_args)
Garry Wangcdd27b22020-01-13 14:59:11 -08001090 try:
1091 newhost.restart_servod(quick_startup=True)
1092 except error.AutoservSSHTimeout:
1093 logging.warning("Restart servod failed due ssh connection "
1094 "to servohost timed out. This error is forgiven"
1095 " here, we will retry in servo repair process.")
1096 except error.AutoservRunError as e:
1097 logging.warning("Restart servod failed due to:\n%s\n"
1098 "This error is forgiven here, we will retry"
1099 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -07001100
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001101 # TODO(gregorynisbet): Clean all of this up.
1102 logging.debug('create_servo_host: attempt to set info store on '
1103 'servo host')
1104 try:
1105 if dut_host_info is None:
1106 logging.debug('create_servo_host: dut_host_info is '
1107 'None, skipping')
1108 else:
1109 newhost.set_dut_host_info(dut_host_info)
1110 logging.debug('create_servo_host: successfully set info '
1111 'store')
1112 except Exception:
1113 logging.error("create_servo_host: (%s)", traceback.format_exc())
1114
Richard Barnette9a26ad62016-06-10 12:03:08 -07001115 # Note that the logic of repair() includes everything done
1116 # by verify(). It's sufficient to call one or the other;
1117 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001118 if servo_dependency:
1119 newhost.repair(silent=True)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001120 return newhost, newhost.get_servo_state()
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001121
1122 if try_servo_repair:
1123 try:
1124 newhost.repair()
1125 except Exception:
1126 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -07001127 else:
1128 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001129 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001130 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001131 logging.exception('servo verify failed for %s', newhost.hostname)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001132 return newhost, newhost.get_servo_state()
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001133
1134
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001135def _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001136 if hostname is None or len(hostname.strip()) == 0:
1137 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001138 if port is None:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001139 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001140 if not type(port) is int:
1141 try:
1142 int(port)
1143 except ValueError:
1144 return False
1145
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001146 return True
1147
1148
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001149def is_servo_host_information_valid(hostname, port):
1150 if not _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001151 return False
1152 # checking range and correct of the port
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001153 port_int = int(port)
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001154 if port_int < 1 or port_int > 65000:
1155 return False
1156 # we expecting host contain only latters, digits and '-' or '_'
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001157 if not re.match('[a-zA-Z0-9-_\.]*$', hostname) or len(hostname) < 5:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001158 return False
1159 return True