blob: b131883340768718666025fe287505fc404e0e48 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
Fang Deng5d518f42013-08-02 14:04:32 -070012import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060013import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080014import re
15import tarfile
Garry Wangc1288cf2019-12-17 14:58:00 -080016import time
Gregory Nisbet265a52c2019-12-10 20:38:42 -080017import traceback
Dana Goyette4dc0adc2019-05-06 14:51:53 -070018import xmlrpclib
Fang Deng5d518f42013-08-02 14:04:32 -070019
20from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070021from autotest_lib.client.common_lib import error
Richard Barnette9a26ad62016-06-10 12:03:08 -070022from autotest_lib.client.common_lib import hosts
Garry Wang7b0e1b72020-03-25 19:08:59 -070023from autotest_lib.client.common_lib import lsbrelease_utils
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070025from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070026from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000027from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070028from autotest_lib.server.hosts import base_servohost
Garry Wang11b5e872020-03-11 15:14:08 -070029from autotest_lib.server.hosts import servo_constants
Otabek Kasimov4ea636e2020-04-14 23:35:06 -070030from autotest_lib.server.cros.faft.utils import config
Garry Wang11b5e872020-03-11 15:14:08 -070031from autotest_lib.client.common_lib import global_config
Garry Wangd7367482020-02-27 13:52:40 -080032
Dan Shi3b2adf62015-09-02 17:46:54 -070033_CONFIG = global_config.global_config
Fang Deng5d518f42013-08-02 14:04:32 -070034
Otabek Kasimova7ba91a2020-03-09 08:31:01 -070035
Garry Wangebc015b2019-06-06 17:45:06 -070036class ServoHost(base_servohost.BaseServoHost):
37 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070038 that with a servo instance for a specific port.
39
40 @type _servo: servo.Servo | None
41 """
Fang Deng5d518f42013-08-02 14:04:32 -070042
Raul E Rangel52ca2e82018-07-03 14:10:14 -060043 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070044
Dan Shie5b3c512014-08-21 12:12:09 -070045 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070046 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070047
xixuan6cf6d2f2016-01-29 15:29:00 -080048 # Ready test function
49 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070050
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080051 # Directory prefix on the servo host where the servod logs are stored.
52 SERVOD_LOG_PREFIX = '/var/log/servod'
53
54 # Exit code to use when symlinks for servod logs are not found.
55 NO_SYMLINKS_CODE = 9
56
57 # Directory in the job's results directory to dump the logs into.
58 LOG_DIR = 'servod'
59
60 # Prefix for joint loglevel files in the logs.
61 JOINT_LOG_PREFIX = 'log'
62
63 # Regex group to extract timestamp from logfile name.
64 TS_GROUP = 'ts'
65
66 # This regex is used to extract the timestamp from servod logs.
67 # files always start with log.
68 TS_RE = (r'log.'
69 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
70 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
71 # The loglevel is optional depending on labstation version.
72 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
73 TS_EXTRACTOR = re.compile(TS_RE)
74
75 # Regex group to extract MCU name from logline in servod logs.
76 MCU_GROUP = 'mcu'
77
78 # Regex group to extract logline from MCU logline in servod logs.
79 LINE_GROUP = 'line'
80
81 # This regex is used to extract the mcu and the line content from an
82 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
83 # Here is an example log-line:
84 #
85 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
86 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
87 #
88 # Here is conceptually how they are formatted:
89 #
90 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
91 # <output>
92 #
93 # The log format starts with a timestamp
94 MCU_RE = (r'[\d\-]+ [\d:,]+ '
95 # The mcu that is logging this is next.
96 r'- (?P<%s>\w+) - '
97 # Next, we have more log outputs before the actual line.
98 # Information about the file line, logging function etc.
99 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
100 # NOTE: if the log format changes, this regex needs to be
101 # adjusted.
102 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
103 # Lastly, we get the MCU's console line.
104 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
105 MCU_EXTRACTOR = re.compile(MCU_RE)
106
107 # Suffix to identify compressed logfiles.
108 COMPRESSION_SUFFIX = '.tbz2'
109
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700110 # A suffix to mark servod log directories that came from instance that
111 # ran during this servo_host, but are not the last one running e.g. when
112 # an instance (on purpose, or due to a bug) restarted in the middle of the
113 # run.
114 OLD_LOG_SUFFIX = 'old'
115
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800116 def _init_attributes(self):
117 self._servo_state = None
118 self.servo_port = None
119 self.servo_board = None
120 self.servo_model = None
121 self.servo_serial = None
122 self._servo = None
123 self._servod_server_proxy = None
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700124 self._initial_instance_ts = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800125 # Flag to make sure that multiple calls to close do not result in the
126 # logic executing multiple times.
127 self._closed = False
Fang Deng5d518f42013-08-02 14:04:32 -0700128
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700129 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700130 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700131 servo_model=None, servo_serial=None, is_in_lab=None,
132 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700133 """Initialize a ServoHost instance.
134
135 A ServoHost instance represents a host that controls a servo.
136
137 @param servo_host: Name of the host where the servod process
138 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600139 @param servo_port: Port the servod process is listening on. Defaults
140 to the SERVOD_PORT environment variable if set,
141 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700142 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700143 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800144 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
145 to None, for which utils.host_is_in_lab_zone will be
146 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700147
148 """
149 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700150 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800151 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700152 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700153 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700154 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700155 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800156
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800157 # The location of the log files on the servo host for this instance.
158 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
159 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700160 # Path of the servo host lock file.
161 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
162 + self.LOCK_FILE_POSTFIX)
163 # File path to declare a reboot request.
164 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
165 + self.REBOOT_FILE_POSTFIX)
166
167 # Lock the servo host if it's an in-lab labstation to prevent other
168 # task to reboot it until current task completes. We also wait and
169 # make sure the labstation is up here, in the case of the labstation is
170 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700171 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700172 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
173 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700174 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700175
Richard Barnette9a26ad62016-06-10 12:03:08 -0700176 self._repair_strategy = (
177 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700178
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700179 def __str__(self):
180 return "<%s '%s:%s'>" % (
181 type(self).__name__, self.hostname, self.servo_port)
182
Richard Barnette9a26ad62016-06-10 12:03:08 -0700183 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700184 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700185
186 Initializes `self._servo` and then verifies that all network
187 connections are working. This will create an ssh tunnel if
188 it's required.
189
190 As a side effect of testing the connection, all signals on the
191 target servo are reset to default values, and the USB stick is
192 set to the neutral (off) position.
193 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700194 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Kuang-che Wu05763f52019-08-30 16:48:21 +0800195 self._servo = servo_obj
Richard Barnette9a26ad62016-06-10 12:03:08 -0700196 timeout, _ = retry.timeout(
197 servo_obj.initialize_dut,
198 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
199 if timeout:
200 raise hosts.AutoservVerifyError(
201 'Servo initialize timed out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700202
203
204 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700205 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700206
207 If we've previously successfully connected to our servo,
208 disconnect any established ssh tunnel, and set `self._servo`
209 back to `None`.
210 """
211 if self._servo:
212 # N.B. This call is safe even without a tunnel:
213 # rpc_server_tracker.disconnect() silently ignores
214 # unknown ports.
215 self.rpc_server_tracker.disconnect(self.servo_port)
216 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700217
Garry Wangc1288cf2019-12-17 14:58:00 -0800218
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800219 def _create_servod_server_proxy(self):
220 """Create a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700221
222 @returns: An xmlrpclib.ServerProxy that is connected to the servod
223 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700224 """
Garry Wang11b5e872020-03-11 15:14:08 -0700225 if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
226 and not self.is_localhost()):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700227 return self.rpc_server_tracker.xmlrpc_connect(
228 None, self.servo_port,
229 ready_test_name=self.SERVO_READY_METHOD,
Allen Li2b1a8992018-11-27 14:17:18 -0800230 timeout_seconds=60,
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700231 request_timeout_seconds=3600,
232 server_desc=str(self))
Richard Barnette9a26ad62016-06-10 12:03:08 -0700233 else:
234 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
235 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700236
237
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800238 def get_servod_server_proxy(self):
239 """Return a cached proxy if exists; otherwise, create a new one.
240
241 @returns: An xmlrpclib.ServerProxy that is connected to the servod
242 server on the host.
243 """
244 # Single-threaded execution, no race
245 if self._servod_server_proxy is None:
246 self._servod_server_proxy = self._create_servod_server_proxy()
247 return self._servod_server_proxy
248
249
Richard Barnette1edbb162016-11-01 11:47:50 -0700250 def verify(self, silent=False):
251 """Update the servo host and verify it's in a good state.
252
253 @param silent If true, suppress logging in `status.log`.
254 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700255 message = 'Beginning verify for servo host %s port %s serial %s'
256 message %= (self.hostname, self.servo_port, self.servo_serial)
257 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700258 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700259 self._repair_strategy.verify(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700260 self._servo_state = servo_constants.SERVO_STATE_WORKING
261 self.record('INFO', None, None,
262 'ServoHost verify set servo_state as WORKING')
Garry Wang63b8c382020-03-11 22:28:40 -0700263 except Exception as e:
Garry Wang11b5e872020-03-11 15:14:08 -0700264 self._servo_state = servo_constants.SERVO_STATE_BROKEN
265 self.record('INFO', None, None,
266 'ServoHost verify set servo_state as BROKEN')
Garry Wang63b8c382020-03-11 22:28:40 -0700267 if self._is_critical_error(e):
268 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700269
270
Garry Wang7b0e1b72020-03-25 19:08:59 -0700271 def get_image_name_from_usbkey(self, usbkey_dev):
272 """Mount usb drive and check ChromeOS image name on it if there is
273 one. This method assumes the image_usbkey_direction is already set
274 to servo side.
275
276 @param: usbkey_dev usbkey dev path(e.g. /dev/sdb).
277
278 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
279 or empty string if no test image detected, or unexpected
280 error occurred.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700281 """
Garry Wang70e5d062020-04-03 18:01:05 -0700282 logging.info('Checking ChromeOS image name on usbkey.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700283 usb_mount_path = '/media/servo_usb/%s' % self.servo_port
284 unmount_cmd = 'umount %s' % usb_mount_path
285 # ChromeOS root fs is in /dev/sdx3
286 mount_cmd = 'mount -o ro %s3 %s' % (usbkey_dev, usb_mount_path)
287 # Unmount if there is an existing stale mount.
288 self.run(unmount_cmd, ignore_status=True)
289 # Create if the mount point is not existing.
290 self.run('mkdir -p %s' % usb_mount_path)
291 try:
292 # Attempt to mount the usb drive.
293 mount_result = self.run(mount_cmd, ignore_status=True)
Garry Wang70e5d062020-04-03 18:01:05 -0700294 if mount_result.exit_status != 0:
295 logging.error('Unexpected error occurred on mount usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700296 return ''
Garry Wang70e5d062020-04-03 18:01:05 -0700297
298 release_content = self.run(
299 'cat %s/etc/lsb-release' % usb_mount_path,
300 ignore_status=True).stdout.strip()
301
302 if not re.search(r'RELEASE_TRACK=.*test', release_content):
303 logging.info('The image on usbkey is not a test image')
304 return ''
305
306 return lsbrelease_utils.get_chromeos_release_builder_path(
307 lsb_release_content=release_content)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700308 finally:
Garry Wang70e5d062020-04-03 18:01:05 -0700309 logging.debug('Image check compeleted, unmounting the usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700310 self.run(unmount_cmd, ignore_status=True)
311
312
Garry Wang70e5d062020-04-03 18:01:05 -0700313 def _probe_and_validate_usb_dev(self):
314 """This method probe the usb dev path by talking to servo, and then
315 validate the dev path is valid block device to servohost.
316 Possible output:
317 1. Encounter error during probe usb dev, returns empty string.
318 2. probe usb dev completed without error but cannot find usb dev,
319 raise AutoservRepairError.
320 3. probe usb dev find a usb dev path, but failed validation in this
321 method, raise AutoservRepairError.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700322
Garry Wang70e5d062020-04-03 18:01:05 -0700323 @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
324 if unexpected error occurred during probe.
325 @raises: AutoservRepairError if servo couldn't probe the usb dev path
326 (servo.probe_host_usb_dev() returns empty string), or the dev path is
327 not valid block device to servohost.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700328 """
329 logging.info('Validating image usbkey on servo.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700330 try:
Garry Wang70e5d062020-04-03 18:01:05 -0700331 usb_dev = self._servo.probe_host_usb_dev()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700332 except Exception as e:
333 # We don't want any unexpected or transient servo communicating
334 # failure block usb repair, so capture all errors here.
335 logging.error(e, exc_info=True)
336 logging.error('Unexpected error occurred on get usbkey dev path,'
337 ' skipping usbkey validation.')
338 return ''
339
Garry Wang70e5d062020-04-03 18:01:05 -0700340 if usb_dev:
341 # probe_host_usb_dev() sometimes return stale record,
342 # so we need to make sure the path exists in fdisk.
343 validate_cmd = 'fdisk -l | grep %s' % usb_dev
344 resp = self.run(validate_cmd, ignore_status=True)
345 if resp.exit_status == 0:
346 return usb_dev
347
348 logging.error('%s is reported from "image_usbkey_dev" control but'
349 ' not detected from fdisk!', usb_dev)
350
351 raise hosts.AutoservRepairError(
352 'No usbkey detected on servo, the usbkey may be either missing'
353 ' or broken. Please replace usbkey on the servo and retry.',
354 'missing usbkey')
355
356
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700357 def is_ec_supported(self):
Garry Wang9b8f2342020-04-17 16:34:09 -0700358 """Check if ec is supported on the servo_board"""
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700359 if self.servo_board:
360 try:
361 frm_config = config.Config(self.servo_board, self.servo_model)
362 return frm_config.chrome_ec
363 except Exception as e:
364 logging.error('Unexpected error when read from firmware'
365 ' configs; %s', str(e))
366 return False
367
Garry Wang70e5d062020-04-03 18:01:05 -0700368 def validate_image_usbkey(self):
369 """This method first validate if there is a recover usbkey on servo
370 that accessible to servohost, and second check if a ChromeOS image is
371 already on the usb drive and return the image_name so we can avoid
372 unnecessary download and flash to the recover usbkey on servo.
373
374 Please note that, there is special error handling logic here:
375 1. If unexpected error happens, we return empty string. So repair
376 actions will not get blocked.
377 2. If no working usbkey present on servo, but no errors, we'll raise
378 AutoservRepairError here.
379
380 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
381 or empty string if no test image detected, or unexpected
382 error occurred.
383 @raises: AutoservRepairError if the usbkey is not detected on servo.
384 """
385 usb_dev = self._probe_and_validate_usb_dev()
386 if usb_dev:
387 return self.get_image_name_from_usbkey(usb_dev)
388 else:
389 return ''
Garry Wang7b0e1b72020-03-25 19:08:59 -0700390
391
Richard Barnette1edbb162016-11-01 11:47:50 -0700392 def repair(self, silent=False):
393 """Attempt to repair servo host.
394
395 @param silent If true, suppress logging in `status.log`.
396 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700397 message = 'Beginning repair for servo host %s port %s serial %s'
398 message %= (self.hostname, self.servo_port, self.servo_serial)
399 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700400 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700401 self._repair_strategy.repair(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700402 self._servo_state = servo_constants.SERVO_STATE_WORKING
403 self.record('INFO', None, None,
404 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700405 # If target is a labstation then try to withdraw any existing
406 # reboot request created by this servo because it passed repair.
407 if self.is_labstation():
408 self.withdraw_reboot_request()
Garry Wang63b8c382020-03-11 22:28:40 -0700409 except Exception as e:
Garry Wang11b5e872020-03-11 15:14:08 -0700410 self._servo_state = servo_constants.SERVO_STATE_BROKEN
411 self.record('INFO', None, None,
412 'ServoHost repair set servo_state as BROKEN')
Garry Wang63b8c382020-03-11 22:28:40 -0700413 if self._is_critical_error(e):
414 self.disconnect_servo()
415 self.stop_servod()
416 raise
417
418
419 def _is_critical_error(self, error):
420 if (isinstance(error, hosts.AutoservVerifyDependencyError)
421 and not error.is_critical()):
422 logging.warning('Non-critical verify failure(s) detected during'
423 ' verify/repair servo, servo connection will'
424 ' still up but may not fully functional.'
425 ' Some repair actions and servo depended'
426 ' tests may not run.')
427 return False
428 logging.info('Critical verify failure(s) detected during repair/verify'
429 ' servo. Disconnecting servo and stop servod, all repair '
430 'action and tests that depends on servo will not run.')
431 return True
Fang Deng5d518f42013-08-02 14:04:32 -0700432
433
Dan Shi4d478522014-02-14 13:46:32 -0800434 def get_servo(self):
435 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700436
Dan Shi4d478522014-02-14 13:46:32 -0800437 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700438 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700439 """
Dan Shi4d478522014-02-14 13:46:32 -0800440 return self._servo
441
442
Garry Wang79e9af62019-06-12 15:19:19 -0700443 def request_reboot(self):
444 """Request servohost to be rebooted when it's safe to by touch a file.
445 """
446 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700447 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700448 self.run('touch %s' % self._reboot_file, ignore_status=True)
449
450
Garry Wang464ff1e2019-07-18 17:20:34 -0700451 def withdraw_reboot_request(self):
452 """Withdraw a servohost reboot request if exists by remove the flag
453 file.
454 """
455 logging.debug('Withdrawing request to reboot servohost %s that created'
456 ' by servo with port # %s if exists.',
457 self.hostname, self.servo_port)
458 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
459
460
Garry Wangc1288cf2019-12-17 14:58:00 -0800461 def start_servod(self, quick_startup=False):
462 """Start the servod process on servohost.
463 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800464 # Skip if running on the localhost.(crbug.com/1038168)
465 if self.is_localhost():
466 logging.debug("Servohost is a localhost, skipping start servod.")
467 return
468
469 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800470 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800471 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800472 if self.servo_model:
473 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800474 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800475 logging.warning('Board for DUT is unknown; starting servod'
476 ' assuming a pre-configured board.')
477
478 cmd += ' PORT=%d' % self.servo_port
479 if self.servo_serial:
480 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800481
482 # Start servod with dual_v4 if the DUT/servo from designated pools.
483 dut_host_info = self.get_dut_host_info()
484 if dut_host_info:
Garry Wang11b5e872020-03-11 15:14:08 -0700485 if bool(dut_host_info.pools &
486 servo_constants.POOLS_SUPPORT_DUAL_V4):
Garry Wangd7367482020-02-27 13:52:40 -0800487 logging.debug('The DUT is detected in following designated'
488 ' pools %s,starting servod with DUAL_V4 option.',
Garry Wang11b5e872020-03-11 15:14:08 -0700489 servo_constants.POOLS_SUPPORT_DUAL_V4)
Garry Wangd7367482020-02-27 13:52:40 -0800490 cmd += ' DUAL_V4=1'
491
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800492 # Remove the symbolic links from the logs. This helps ensure that
493 # a failed servod instantiation does not cause us to grab old logs
494 # by mistake.
495 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800496 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800497
498 # There's a lag between when `start servod` completes and when
499 # the _ServodConnectionVerifier trigger can actually succeed.
500 # The call to time.sleep() below gives time to make sure that
501 # the trigger won't fail after we return.
502
503 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
504 # But in the rare case all servo on a labstation are in heavy use they
505 # may take ~30 seconds. So the timeout value will double these value,
506 # and we'll try quick start up when first time initialize servohost,
507 # and use standard start up timeout in repair.
508 if quick_startup:
Garry Wang11b5e872020-03-11 15:14:08 -0700509 timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800510 else:
Garry Wang11b5e872020-03-11 15:14:08 -0700511 timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800512 logging.debug('Wait %s seconds for servod process fully up.', timeout)
513 time.sleep(timeout)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700514 # Cache the initial instance timestamp to check against servod restarts
515 self._initial_instance_ts = self.get_instance_logs_ts()
Garry Wangc1288cf2019-12-17 14:58:00 -0800516
517
518 def stop_servod(self):
519 """Stop the servod process on servohost.
520 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800521 # Skip if running on the localhost.(crbug.com/1038168)
522 if self.is_localhost():
523 logging.debug("Servohost is a localhost, skipping stop servod.")
524 return
525
Garry Wangc1288cf2019-12-17 14:58:00 -0800526 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800527 self.run('stop servod PORT=%d' % self.servo_port,
528 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800529 logging.debug('Wait %s seconds for servod process fully teardown.',
Garry Wang11b5e872020-03-11 15:14:08 -0700530 servo_constants.SERVOD_TEARDOWN_TIMEOUT)
531 time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
Garry Wangc1288cf2019-12-17 14:58:00 -0800532
533
534 def restart_servod(self, quick_startup=False):
535 """Restart the servod process on servohost.
536 """
537 self.stop_servod()
538 self.start_servod(quick_startup)
539
Garry Wang11b5e872020-03-11 15:14:08 -0700540
Garry Wangffbd2162020-04-17 16:13:48 -0700541 def _process_servodtool_error(self, response):
542 """Helper function to handle non-zero servodtool response.
543 """
544 if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
545 response.stderr):
546 logging.error('The servo is not plugged on a usb hub that supports'
547 ' power-cycle!')
548 return
549
550 if re.search(servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
551 self.servo_serial, response.stdout):
552 logging.error('No servo with serial %s found!', self.servo_serial)
553 return
554
555 logging.error('Unexpected error occurred from usbhub control, please'
556 ' file a bug and inform chrome-fleet-software@ team!')
557
558
559 def _get_servo_usb_devnum(self):
560 """Helper function to collect current usb devnum of servo.
561 """
562 resp = self.run('servodtool device -s %s usb-path' % self.servo_serial,
563 ignore_status=True)
564 if resp.exit_status != 0:
565 self._process_servodtool_error(resp)
566 return ''
567 usb_path = resp.stdout.strip()
568 logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
569
570 resp = self.run('cat %s/devnum' % usb_path,
571 ignore_status=True)
572 if resp.exit_status != 0:
573 self._process_servodtool_error(resp)
574 return ''
575 return resp.stdout.strip()
576
577
578 def _reset_servo(self):
579 logging.info('Resetting servo through smart usbhub.')
580 resp = self.run('servodtool device -s %s power-cycle' %
581 self.servo_serial, ignore_status=True)
582 if resp.exit_status != 0:
583 self._process_servodtool_error(resp)
584 return False
585
586 logging.debug('Wait %s seconds for servo to come back from reset.',
587 servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
588 time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
589 return True
590
591
592 def reset_servo(self):
593 """Reset(power-cycle) the servo via smart usbhub.
594 """
595 if not self.is_labstation():
596 logging.info('Servo reset is not applicable to servo_v3.')
597 return
598
599 pre_reset_devnum = self._get_servo_usb_devnum()
600 logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
601 result = self._reset_servo()
602 if not result:
603 message = ('Failed to reset servo with serial: %s'
604 % self.servo_serial)
605 logging.warning(message)
606 self.record('INFO', None, None, message)
607 return
608
609 post_reset_devnum = self._get_servo_usb_devnum()
610 logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
611 if not (pre_reset_devnum and post_reset_devnum):
612 message = ('Servo reset completed but unable to verify'
613 ' devnum change!')
614 elif pre_reset_devnum != post_reset_devnum:
615 message = ('Reset servo with serial %s completed successfully!'
616 % self.servo_serial)
617 else:
618 message = 'Servo reset completed but devnum is still not changed!'
619 logging.info(message)
620 self.record('INFO', None, None, message)
621
622
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800623 def _extract_compressed_logs(self, logdir, relevant_files):
624 """Decompress servod logs in |logdir|.
625
626 @param logdir: directory containing compressed servod logs.
627 @param relevant_files: list of files in |logdir| to consider.
628
629 @returns: tuple, (tarfiles, files) where
630 tarfiles: list of the compressed filenames that have been
631 extracted and deleted
632 files: list of the uncompressed files that were generated
633 """
634 # For all tar-files, first extract them to the directory, and
635 # then let the common flow handle them.
636 tarfiles = [cf for cf in relevant_files if
637 cf.endswith(self.COMPRESSION_SUFFIX)]
638 files = []
639 for f in tarfiles:
640 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
641 with tarfile.open(f) as tf:
642 # Each tarfile has only one member, as
643 # that's the compressed log.
644 member = tf.members[0]
645 # Manipulate so that it only extracts the basename, and not
646 # the directories etc.
647 member.name = norm_name
648 files.append(os.path.join(logdir, member.name))
649 tf.extract(member, logdir)
650 # File has been extracted: remove the compressed file.
651 os.remove(f)
652 return tarfiles, files
653
654 def _extract_mcu_logs(self, log_subdir):
655 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
656
657 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
658 lines from the logs to generate invidiual console logs e.g. after
659 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
660 those MCUs had any console input/output.
661
662 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
663 """
664 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
665 # files
666 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
667 if not os.path.exists(mcu_lines_file):
668 logging.info('No DEBUG logs found to extract MCU logs from.')
669 return
670 mcu_files = {}
671 mcu_file_template = '%s.txt'
672 with open(mcu_lines_file, 'r') as f:
673 for line in f:
674 match = self.MCU_EXTRACTOR.match(line)
675 if match:
676 mcu = match.group(self.MCU_GROUP).lower()
677 line = match.group(self.LINE_GROUP)
678 if mcu not in mcu_files:
679 mcu_file = os.path.join(log_subdir,
680 mcu_file_template % mcu)
681 mcu_files[mcu] = open(mcu_file, 'a')
682 fd = mcu_files[mcu]
683 fd.write(line + '\n')
684 for f in mcu_files:
685 mcu_files[f].close()
686
687
688 def remove_latest_log_symlinks(self):
689 """Remove the conveninence symlinks 'latest' servod logs."""
690 symlink_wildcard = '%s/latest*' % self.remote_log_dir
691 cmd = 'rm ' + symlink_wildcard
692 self.run(cmd, stderr_tee=None, ignore_status=True)
693
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700694 def probe_servod_restart(self, instance_ts, outdir):
695 """Grab servod logs from previous instances if part of this session.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800696
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700697 If since the last time this host called start_servod() servod crashed
698 and restarted, this helper finds those logs as well, and stores them
699 with the |OLD_LOG_SUFFIX| to investigate if necessary.
Prasad Vuppalapu5bd9da12020-03-31 01:46:47 +0000700
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700701 It also issues a panicinfo command to servo devices after the restart
702 to try and collect reboot information for debugging.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800703
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700704 @param instance_ts: the log timestamp that the current instance uses
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800705 @param outdir: directory to create a subdirectory into to place the
706 servod logs into.
707 """
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700708 if self._initial_instance_ts is None:
709 logging.info('No log timestamp grabbed successfully on servod '
710 'startup. Cannot check device restarts. Ignoring.')
711 return
712 if instance_ts == self._initial_instance_ts:
713 logging.debug('Servod appears to have run without restarting')
714 return
715 # Servod seems to have restarted (at least once). |_initial_instance_ts|
716 # is the first timestamp, and instance_ts is the current timestamp. Find
717 # all timestamps in between them, and grab the logs for each.
718 tss = self._find_instance_timestamps_between(self._initial_instance_ts,
719 instance_ts)
720 logging.info('Servod has restarted %d times between the start and the '
721 'end of this servo_host.', len(tss))
722 logging.info('This might be an issue. Will extract all logs from each '
723 'instance.')
724 logging.info('Logs that are not the currently running (about to turn '
725 'down) instance are maked with a .%s in their folder.',
726 self.OLD_LOG_SUFFIX)
727 for ts in tss:
728 self.get_instance_logs(ts, outdir, old=True)
729 # Lastly, servod has restarted due to a potential issue. Try to get
730 # panic information from servo micro and servo v4 for the current logs.
731 # This can only happen if the |_servo| attribute is initialized.
732 if self._servo:
733 for mcu in ['servo_micro', 'servo_v4']:
734 ctrl = '%s_uart_cmd' % mcu
735 if self._servo.has_control(ctrl):
736 logging.info('Trying to retrieve %r panicinfo into logs',
737 mcu)
738 try:
739 self._servo.set_nocheck(ctrl, 'panicinfo')
740 except error.TestFail as e:
741 logging.error('Failed to generate panicinfo for %r '
742 'logs. %s', mcu, str(e))
743
744 def _find_instance_timestamps_between(self, start_ts, end_ts):
745 """Find all log timestamps between [start_ts, end_ts).
746
747 @param start_ts: str, earliest log timestamp of interest
748 @param end_ts: str, latest log timestamp of interest
749
750 @returns: list, all timestamps between start_ts and end_ts, end_ts
751 exclusive, on the servo_host. An empty list on errors
752 """
753 # Simply get all timestamp, and then sort and remove
754 cmd = 'ls %s' % self.remote_log_dir
755 res = self.run(cmd, stderr_tee=None, ignore_status=True)
756 if res.exit_status != 0:
757 # Here we failed to find anything.
758 logging.info('Failed to find remote servod logs. Ignoring.')
759 return []
760 logfiles = res.stdout.strip().split()
761 timestamps = set()
762 for logfile in logfiles:
763 ts_match = self.TS_EXTRACTOR.match(logfile)
764 if not ts_match:
765 # Simply ignore files that fail the check. It might be the
766 # 'latest' symlinks or random files.
767 continue
768 timestamps.add(ts_match.group(self.TS_GROUP))
769 # At this point we have all unique timestamps.
770 timestamps = sorted(timestamps)
771 for ts in [start_ts, end_ts]:
772 if ts not in timestamps:
773 logging.error('Timestamp %r not in servod logs. Cannot query '
774 'for timestamps in between %r and %r', ts,
775 start_ts, end_ts)
776 return []
777 return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
778
779 def get_instance_logs_ts(self):
780 """Retrieve the currently running servod instance's log timestamp
781
782 @returns: str, timestamp for current instance, or None on failure
783 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800784 # First, extract the timestamp. This cmd gives the real filename of
785 # the latest aka current log file.
786 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
787 'then realpath %(dir)s/latest.DEBUG;'
788 'elif [ -f %(dir)s/latest ];'
789 'then realpath %(dir)s/latest;'
790 'else exit %(code)d;'
791 'fi' % {'dir': self.remote_log_dir,
792 'code': self.NO_SYMLINKS_CODE})
793 res = self.run(cmd, stderr_tee=None, ignore_status=True)
794 if res.exit_status != 0:
795 if res.exit_status == self.NO_SYMLINKS_CODE:
796 logging.warning('servod log latest symlinks not found. '
797 'This is likely due to an error starting up '
798 'servod. Ignoring..')
799 else:
800 logging.warning('Failed to find servod logs on servo host.')
801 logging.warning(res.stderr.strip())
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700802 return None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800803 fname = os.path.basename(res.stdout.strip())
804 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
Ruben Rodriguez Buchillone9aa2b02020-03-04 12:14:28 -0800805 ts_match = self.TS_EXTRACTOR.match(fname)
806 if not ts_match:
807 logging.warning('Failed to extract timestamp from servod log file '
808 '%r. Skipping. The servo host is using outdated '
809 'servod logging and needs to be updated.', fname)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700810 return None
811 return ts_match.group(self.TS_GROUP)
812
813 def get_instance_logs(self, instance_ts, outdir, old=False):
814 """Collect all logs with |instance_ts| and dump into a dir in |outdir|
815
816 This method first collects all logs on the servo_host side pertaining
817 to this servod instance (port, instatiation). It glues them together
818 into combined log.[level].txt files and extracts all available MCU
819 console I/O from the logs into individual files e.g. servo_v4.txt
820
821 All the output can be found in a directory inside |outdir| that
822 this generates based on |LOG_DIR|, the servod port, and the instance
823 timestamp on the servo_host side.
824
825 @param instance_ts: log timestamp to grab logfiles for
826 @param outdir: directory to create a subdirectory into to place the
827 servod logs into.
828 @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
829 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800830 # Create the local results log dir.
831 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
832 str(self.servo_port),
833 instance_ts))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700834 if old:
835 log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
836 logging.info('Saving servod logs to %r.', log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800837 os.mkdir(log_dir)
838 # Now, get all files with that timestamp.
839 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
840 instance_ts)
841 res = self.run(cmd, stderr_tee=None, ignore_status=True)
842 files = res.stdout.strip().split()
843 try:
844 self.get_file(files, log_dir, try_rsync=False)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700845 if not os.listdir(log_dir):
846 logging.info('No servod logs retrieved. Ignoring, and removing '
847 '%r again.', log_dir)
848 os.rmdir(log_dir)
849 return
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800850 except error.AutoservRunError as e:
851 result = e.result_obj
852 if result.exit_status != 0:
853 stderr = result.stderr.strip()
854 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
855 stderr or '\n%s' % result)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700856 # Remove the log_dir as nothing was added to it.
857 os.rmdir(log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800858 return
859 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
860 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
861 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
862 # Create the joint files for each loglevel. i.e log.DEBUG
863 joint_file = self.JOINT_LOG_PREFIX
864 if level_name:
865 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
866 # This helps with some online tools to avoid complaints about an
867 # unknown filetype.
868 joint_file = joint_file + '.txt'
869 joint_path = os.path.join(log_dir, joint_file)
870 files = [f for f in local_files if level_name in f]
871 if not files:
872 # TODO(crrev.com/c/1793030): remove no-level case once CL
873 # is pushed
874 continue
875 # Extract compressed logs if any.
876 compressed, extracted = self._extract_compressed_logs(log_dir,
877 files)
878 files = list(set(files) - set(compressed))
879 files.extend(extracted)
880 # Need to sort. As they all share the same timestamp, and
881 # loglevel, the index itself is sufficient. The highest index
882 # is the oldest file, therefore we need a descending sort.
883 def sortkey(f, level=level_name):
884 """Custom sortkey to sort based on rotation number int."""
885 if f.endswith(level_name): return 0
886 return int(f.split('.')[-1])
887
888 files.sort(reverse=True, key=sortkey)
889 # Just rename the first file rather than building from scratch.
890 os.rename(files[0], joint_path)
891 with open(joint_path, 'a') as joint_f:
892 for logfile in files[1:]:
893 # Transfer the file to the joint file line by line.
894 with open(logfile, 'r') as log_f:
895 for line in log_f:
896 joint_f.write(line)
897 # File has been written over. Delete safely.
898 os.remove(logfile)
899 # Need to remove all files form |local_files| so we don't
900 # analyze them again.
901 local_files = list(set(local_files) - set(files) - set(compressed))
902 # Lastly, extract MCU logs from the joint logs.
903 self._extract_mcu_logs(log_dir)
904
Garry Wang79e9af62019-06-12 15:19:19 -0700905 def _lock(self):
906 """lock servohost by touching a file.
907 """
908 logging.debug('Locking servohost %s by touching %s file',
909 self.hostname, self._lock_file)
910 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700911 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -0700912
913
914 def _unlock(self):
915 """Unlock servohost by removing the lock file.
916 """
917 logging.debug('Unlocking servohost by removing %s file',
918 self._lock_file)
919 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700920 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -0700921
922
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700923 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -0700924 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700925 # NOTE: throughout this method there are multiple attempts to catch
926 # all errors. This is WAI as log grabbing should not fail tests.
927 # However, the goal is to catch and handle/process all errors, thus
928 # we print the traceback and ask for a bug.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800929 if self._closed:
930 logging.debug('ServoHost is already closed.')
931 return
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700932 instance_ts = self.get_instance_logs_ts()
933 # TODO(crbug.com/1011516): once enabled, remove the check against
934 # localhost and instead check against log-rotiation enablement.
935 logs_available = (instance_ts is not None and
936 self.job and
937 not self.is_localhost())
938 if logs_available:
939 # Probe whether there was a servod restart, and grab those old
940 # logs as well.
941 try:
942 self.probe_servod_restart(instance_ts, self.job.resultdir)
943 except (error.AutoservRunError, error.TestFail) as e:
944 logging.info('Failed to grab servo logs due to: %s. '
945 'This error is forgiven.', str(e))
946 except Exception as e:
947 logging.error('Unexpected error probing for old logs. %s. '
948 'Forgiven. Please file a bug and fix or catch '
949 'in log probing function', str(e),
950 exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700951 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800952 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -0700953 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800954 self._servo.close(outdir)
955
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700956 if logs_available:
957 # Grab current (not old like above) logs after the servo instance
958 # was closed out.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800959 try:
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700960 self.get_instance_logs(instance_ts, self.job.resultdir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800961 except error.AutoservRunError as e:
962 logging.info('Failed to grab servo logs due to: %s. '
963 'This error is forgiven.', str(e))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700964 except Exception as e:
965 logging.error('Unexpected error grabbing servod logs. %s. '
966 'Forgiven. Please file a bug and fix or catch '
967 'in log grabbing function', str(e), exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700968
Garry Wang7c00b0f2019-06-25 17:28:17 -0700969 if self._is_locked:
970 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -0700971 try:
972 self._unlock()
973 except error.AutoservSSHTimeout:
974 logging.error('Unlock servohost failed due to ssh timeout.'
975 ' It may caused by servohost went down during'
976 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -0800977 # We want always stop servod after task to minimum the impact of bad
978 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -0800979 try:
980 self.stop_servod()
981 except error.AutoservRunError as e:
982 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800983 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -0800984
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700985 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800986 # Mark closed.
987 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700988
989
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800990 def get_servo_state(self):
Otabek Kasimova7ba91a2020-03-09 08:31:01 -0700991 if self._servo_state is None:
Garry Wang11b5e872020-03-11 15:14:08 -0700992 return servo_constants.SERVO_STATE_UNKNOWN
Otabek Kasimova7ba91a2020-03-09 08:31:01 -0700993 return self._servo_state
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800994
995
Richard Barnetteea3e4602016-06-10 12:36:41 -0700996def make_servo_hostname(dut_hostname):
997 """Given a DUT's hostname, return the hostname of its servo.
998
999 @param dut_hostname: hostname of a DUT.
1000
1001 @return hostname of the DUT's servo.
1002
1003 """
1004 host_parts = dut_hostname.split('.')
1005 host_parts[0] = host_parts[0] + '-servo'
1006 return '.'.join(host_parts)
1007
1008
1009def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001010 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001011
1012 @param servo_hostname: hostname of the servo host.
1013
1014 @return True if it's up, False otherwise
1015 """
1016 # Technically, this duplicates the SSH ping done early in the servo
1017 # proxy initialization code. However, this ping ends in a couple
1018 # seconds when if fails, rather than the 60 seconds it takes to decide
1019 # that an SSH ping has timed out. Specifically, that timeout happens
1020 # when our servo DNS name resolves, but there is no host at that IP.
1021 logging.info('Pinging servo host at %s', servo_hostname)
1022 ping_config = ping_runner.PingConfig(
1023 servo_hostname, count=3,
1024 ignore_result=True, ignore_status=True)
1025 return ping_runner.PingRunner().ping(ping_config).received > 0
1026
1027
Richard Barnettee519dcd2016-08-15 17:37:17 -07001028def _map_afe_board_to_servo_board(afe_board):
1029 """Map a board we get from the AFE to a servo appropriate value.
1030
1031 Many boards are identical to other boards for servo's purposes.
1032 This function makes that mapping.
1033
1034 @param afe_board string board name received from AFE.
1035 @return board we expect servo to have.
1036
1037 """
1038 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1039 BOARD_MAP = {'gizmo': 'panther'}
1040 mapped_board = afe_board
1041 if afe_board in BOARD_MAP:
1042 mapped_board = BOARD_MAP[afe_board]
1043 else:
1044 for suffix in KNOWN_SUFFIXES:
1045 if afe_board.endswith(suffix):
1046 mapped_board = afe_board[0:-len(suffix)]
1047 break
1048 if mapped_board != afe_board:
1049 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1050 return mapped_board
1051
1052
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001053def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001054 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001055
Richard Barnetteea3e4602016-06-10 12:36:41 -07001056 @param dut_host Instance of `Host` on which to find the servo
1057 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -07001058 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001059 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001060 info = dut_host.host_info_store.get()
1061 servo_args = {k: v for k, v in info.attributes.iteritems()
Garry Wang11b5e872020-03-11 15:14:08 -07001062 if k in servo_constants.SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -07001063
Garry Wang11b5e872020-03-11 15:14:08 -07001064 if servo_constants.SERVO_PORT_ATTR in servo_args:
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001065 try:
Garry Wang11b5e872020-03-11 15:14:08 -07001066 servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1067 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001068 except ValueError:
1069 logging.error('servo port is not an int: %s',
Garry Wang11b5e872020-03-11 15:14:08 -07001070 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001071 # Reset servo_args because we don't want to use an invalid port.
Garry Wang11b5e872020-03-11 15:14:08 -07001072 servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001073
1074 if info.board:
Garry Wang11b5e872020-03-11 15:14:08 -07001075 servo_board = _map_afe_board_to_servo_board(info.board)
1076 servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -07001077 if info.model:
Garry Wang11b5e872020-03-11 15:14:08 -07001078 servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1079 return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -07001080
1081
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001082def _tweak_args_for_ssp_moblab(servo_args):
Garry Wang11b5e872020-03-11 15:14:08 -07001083 if (servo_args[servo_constants.SERVO_HOST_ATTR]
1084 in ['localhost', '127.0.0.1']):
1085 servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001086 'SSP', 'host_container_ip', type=str, default=None)
1087
1088
Dan Shi023aae32016-05-25 11:13:01 -07001089def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001090 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001091 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -08001092
Richard Barnette9a26ad62016-06-10 12:03:08 -07001093 This function attempts to create and verify or repair a `ServoHost`
1094 object for a servo connected to the given `dut`, subject to various
1095 constraints imposed by the parameters:
1096 * When the `servo_args` parameter is not `None`, a servo
1097 host must be created, and must be checked with `repair()`.
1098 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1099 true:
1100 * If `try_servo_repair` is true, then create a servo host and
1101 check it with `repair()`.
1102 * Otherwise, if the servo responds to `ping` then create a
1103 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -08001104
Richard Barnette9a26ad62016-06-10 12:03:08 -07001105 In cases where `servo_args` was not `None`, repair failure
1106 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001107 are logged and then discarded. Note that this only happens in cases
1108 where we're called from a test (not special task) control file that
1109 has an explicit dependency on servo. In that case, we require that
1110 repair not write to `status.log`, so as to avoid polluting test
1111 results.
1112
1113 TODO(jrbarnette): The special handling for servo in test control
1114 files is a thorn in my flesh; I dearly hope to see it cut out before
1115 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001116
1117 Parameters for a servo host consist of a host name, port number, and
1118 DUT board, and are determined from one of these sources, in order of
1119 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -07001120 * Servo attributes from the `dut` parameter take precedence over
1121 all other sources of information.
1122 * If a DNS entry for the servo based on the DUT hostname exists in
1123 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -07001124 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001125 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -07001126 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001127
1128 @param dut An instance of `Host` from which to take
1129 servo parameters (if available).
1130 @param servo_args A dictionary with servo parameters to use if
1131 they can't be found from `dut`. If this
1132 argument is supplied, unrepaired exceptions
1133 from `verify()` will be passed back to the
1134 caller.
1135 @param try_lab_servo If not true, servo host creation will be
1136 skipped unless otherwise required by the
1137 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001138 @param try_servo_repair If true, check a servo host with
1139 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -08001140
1141 @returns: A ServoHost object or None. See comments above.
1142
1143 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001144 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001145 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001146 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -07001147 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001148 if utils.in_moblab_ssp():
1149 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001150 logging.debug(
1151 'Overriding provided servo_args (%s) with arguments'
1152 ' determined from the host (%s)',
1153 servo_args,
1154 servo_args_override,
1155 )
Richard Barnetteea3e4602016-06-10 12:36:41 -07001156 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001157
Richard Barnetteea3e4602016-06-10 12:36:41 -07001158 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001159 logging.debug('No servo_args provided, and failed to find overrides.')
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001160 if try_lab_servo or servo_dependency:
Garry Wang11b5e872020-03-11 15:14:08 -07001161 return None, servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001162 else:
1163 # For regular test case which not required the servo
1164 return None, None
1165
Garry Wang11b5e872020-03-11 15:14:08 -07001166 servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1167 servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001168 if not _is_servo_host_information_exist(servo_hostname, servo_port):
1169 logging.debug(
1170 'Servo connection info missed hostname: %s , port: %s',
1171 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001172 return None, servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001173 if not is_servo_host_information_valid(servo_hostname, servo_port):
1174 logging.debug(
1175 'Servo connection info is incorrect hostname: %s , port: %s',
1176 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001177 return None, servo_constants.SERVO_STATE_WRONG_CONFIG
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001178 if (not servo_dependency and not try_servo_repair and
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001179 not servo_host_is_up(servo_hostname)):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001180 logging.debug('ServoHost is not up.')
Garry Wang11b5e872020-03-11 15:14:08 -07001181 return None, servo_constants.SERVO_STATE_BROKEN
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001182
Garry Wangebc015b2019-06-06 17:45:06 -07001183 newhost = ServoHost(**servo_args)
Garry Wangffbd2162020-04-17 16:13:48 -07001184
1185 # Reset servo if the servo is locked, as we check if the servohost is up,
1186 # if the servohost is labstation and if the servohost is in lab inside the
1187 # locking logic. Also check try_servo_repair to make sure we only do this
1188 # in AdminRepair tasks.
1189 if newhost._is_locked and try_servo_repair:
1190 newhost.reset_servo()
1191
Garry Wangcdd27b22020-01-13 14:59:11 -08001192 try:
1193 newhost.restart_servod(quick_startup=True)
1194 except error.AutoservSSHTimeout:
1195 logging.warning("Restart servod failed due ssh connection "
1196 "to servohost timed out. This error is forgiven"
1197 " here, we will retry in servo repair process.")
1198 except error.AutoservRunError as e:
1199 logging.warning("Restart servod failed due to:\n%s\n"
1200 "This error is forgiven here, we will retry"
1201 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -07001202
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001203 # TODO(gregorynisbet): Clean all of this up.
1204 logging.debug('create_servo_host: attempt to set info store on '
1205 'servo host')
1206 try:
1207 if dut_host_info is None:
1208 logging.debug('create_servo_host: dut_host_info is '
1209 'None, skipping')
1210 else:
1211 newhost.set_dut_host_info(dut_host_info)
1212 logging.debug('create_servo_host: successfully set info '
1213 'store')
1214 except Exception:
1215 logging.error("create_servo_host: (%s)", traceback.format_exc())
1216
Richard Barnette9a26ad62016-06-10 12:03:08 -07001217 # Note that the logic of repair() includes everything done
1218 # by verify(). It's sufficient to call one or the other;
1219 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001220 if servo_dependency:
1221 newhost.repair(silent=True)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001222 return newhost, newhost.get_servo_state()
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001223
1224 if try_servo_repair:
1225 try:
1226 newhost.repair()
1227 except Exception:
1228 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -07001229 else:
1230 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001231 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001232 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001233 logging.exception('servo verify failed for %s', newhost.hostname)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001234 return newhost, newhost.get_servo_state()
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001235
1236
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001237def _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001238 if hostname is None or len(hostname.strip()) == 0:
1239 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001240 if port is None:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001241 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001242 if not type(port) is int:
1243 try:
1244 int(port)
1245 except ValueError:
1246 return False
1247
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001248 return True
1249
1250
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001251def is_servo_host_information_valid(hostname, port):
Garry Wang9b8f2342020-04-17 16:34:09 -07001252 """Check if provided servo attributes are valid.
1253
1254 @param hostname Hostname of the servohost.
1255 @param port servo port number.
1256
1257 @returns: A bool value to indicate if provided servo attribute valid.
1258 """
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001259 if not _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001260 return False
1261 # checking range and correct of the port
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001262 port_int = int(port)
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001263 if port_int < 1 or port_int > 65000:
1264 return False
1265 # we expecting host contain only latters, digits and '-' or '_'
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001266 if not re.match('[a-zA-Z0-9-_\.]*$', hostname) or len(hostname) < 5:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001267 return False
1268 return True