blob: 56bb32e6928a4391b1ae1a874fc142444b1e830a [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
Fang Deng5d518f42013-08-02 14:04:32 -070012import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060013import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080014import re
15import tarfile
Garry Wangc1288cf2019-12-17 14:58:00 -080016import time
Gregory Nisbet265a52c2019-12-10 20:38:42 -080017import traceback
Dana Goyette4dc0adc2019-05-06 14:51:53 -070018import xmlrpclib
Otabek Kasimov120b6fa2020-07-03 00:15:27 -070019import calendar
Fang Deng5d518f42013-08-02 14:04:32 -070020
21from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070022from autotest_lib.client.common_lib import error
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Garry Wang7b0e1b72020-03-25 19:08:59 -070024from autotest_lib.client.common_lib import lsbrelease_utils
Fang Deng5d518f42013-08-02 14:04:32 -070025from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070027from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000028from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070029from autotest_lib.server.hosts import base_servohost
Garry Wang11b5e872020-03-11 15:14:08 -070030from autotest_lib.server.hosts import servo_constants
Otabek Kasimov4ea636e2020-04-14 23:35:06 -070031from autotest_lib.server.cros.faft.utils import config
Garry Wang11b5e872020-03-11 15:14:08 -070032from autotest_lib.client.common_lib import global_config
Garry Wangd7367482020-02-27 13:52:40 -080033
Otabek Kasimov15963492020-06-23 21:10:51 -070034try:
35 from chromite.lib import metrics
36except ImportError:
37 metrics = utils.metrics_mock
38
Dan Shi3b2adf62015-09-02 17:46:54 -070039_CONFIG = global_config.global_config
Fang Deng5d518f42013-08-02 14:04:32 -070040
Otabek Kasimova7ba91a2020-03-09 08:31:01 -070041
Garry Wangebc015b2019-06-06 17:45:06 -070042class ServoHost(base_servohost.BaseServoHost):
43 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070044 that with a servo instance for a specific port.
45
46 @type _servo: servo.Servo | None
47 """
Fang Deng5d518f42013-08-02 14:04:32 -070048
Raul E Rangel52ca2e82018-07-03 14:10:14 -060049 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070050
Dan Shie5b3c512014-08-21 12:12:09 -070051 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070052 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070053
xixuan6cf6d2f2016-01-29 15:29:00 -080054 # Ready test function
55 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070056
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080057 # Directory prefix on the servo host where the servod logs are stored.
58 SERVOD_LOG_PREFIX = '/var/log/servod'
59
60 # Exit code to use when symlinks for servod logs are not found.
61 NO_SYMLINKS_CODE = 9
62
63 # Directory in the job's results directory to dump the logs into.
64 LOG_DIR = 'servod'
65
66 # Prefix for joint loglevel files in the logs.
67 JOINT_LOG_PREFIX = 'log'
68
69 # Regex group to extract timestamp from logfile name.
70 TS_GROUP = 'ts'
71
72 # This regex is used to extract the timestamp from servod logs.
73 # files always start with log.
74 TS_RE = (r'log.'
75 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
76 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
77 # The loglevel is optional depending on labstation version.
78 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
79 TS_EXTRACTOR = re.compile(TS_RE)
80
81 # Regex group to extract MCU name from logline in servod logs.
82 MCU_GROUP = 'mcu'
83
84 # Regex group to extract logline from MCU logline in servod logs.
85 LINE_GROUP = 'line'
86
87 # This regex is used to extract the mcu and the line content from an
88 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
89 # Here is an example log-line:
90 #
91 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
92 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
93 #
94 # Here is conceptually how they are formatted:
95 #
96 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
97 # <output>
98 #
99 # The log format starts with a timestamp
100 MCU_RE = (r'[\d\-]+ [\d:,]+ '
101 # The mcu that is logging this is next.
102 r'- (?P<%s>\w+) - '
103 # Next, we have more log outputs before the actual line.
104 # Information about the file line, logging function etc.
105 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
106 # NOTE: if the log format changes, this regex needs to be
107 # adjusted.
108 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
109 # Lastly, we get the MCU's console line.
110 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
111 MCU_EXTRACTOR = re.compile(MCU_RE)
112
113 # Suffix to identify compressed logfiles.
114 COMPRESSION_SUFFIX = '.tbz2'
115
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700116 # A suffix to mark servod log directories that came from instance that
117 # ran during this servo_host, but are not the last one running e.g. when
118 # an instance (on purpose, or due to a bug) restarted in the middle of the
119 # run.
120 OLD_LOG_SUFFIX = 'old'
121
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700122 # Mapping servo board with their vid-pid
123 SERVO_VID_PID = {
124 'servo_v4':'18d1:501b',
125 'ccd_cr50':'18d1:5014',
126 'servo_micro':'18d1:501a',
127 'servo_v3':['18d1:5004', '0403:6014'],
128 }
129
Otabek Kasimov15963492020-06-23 21:10:51 -0700130 # States of verifiers
131 # True - verifier run and passed
132 # False - verifier run and failed
133 # None - verifier did not run or dependency failed
134 VERIFY_SUCCESS = True
135 VERIFY_FAILED = False
136 VERIFY_NOT_RUN = None
137
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800138 def _init_attributes(self):
139 self._servo_state = None
140 self.servo_port = None
141 self.servo_board = None
142 self.servo_model = None
143 self.servo_serial = None
Garry Wang000c6c02020-05-11 21:27:23 -0700144 # The flag that indicate if a servo is connected to a smart usbhub.
145 # TODO(xianuowang@) remove this flag once all usbhubs in the lab
146 # get replaced.
147 self.smart_usbhub = None
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800148 self._servo = None
149 self._servod_server_proxy = None
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700150 self._initial_instance_ts = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800151 # Flag to make sure that multiple calls to close do not result in the
152 # logic executing multiple times.
153 self._closed = False
Fang Deng5d518f42013-08-02 14:04:32 -0700154
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700155 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700156 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700157 servo_model=None, servo_serial=None, is_in_lab=None,
158 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700159 """Initialize a ServoHost instance.
160
161 A ServoHost instance represents a host that controls a servo.
162
163 @param servo_host: Name of the host where the servod process
164 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600165 @param servo_port: Port the servod process is listening on. Defaults
166 to the SERVOD_PORT environment variable if set,
167 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700168 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700169 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800170 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
171 to None, for which utils.host_is_in_lab_zone will be
172 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700173
174 """
175 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700176 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800177 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700178 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700179 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700180 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700181 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800182
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800183 # The location of the log files on the servo host for this instance.
184 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
185 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700186 # Path of the servo host lock file.
187 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
188 + self.LOCK_FILE_POSTFIX)
189 # File path to declare a reboot request.
190 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
191 + self.REBOOT_FILE_POSTFIX)
192
193 # Lock the servo host if it's an in-lab labstation to prevent other
194 # task to reboot it until current task completes. We also wait and
195 # make sure the labstation is up here, in the case of the labstation is
196 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700197 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700198 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
199 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700200 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700201
Richard Barnette9a26ad62016-06-10 12:03:08 -0700202 self._repair_strategy = (
203 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700204
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700205 def __str__(self):
206 return "<%s '%s:%s'>" % (
207 type(self).__name__, self.hostname, self.servo_port)
208
Richard Barnette9a26ad62016-06-10 12:03:08 -0700209 def connect_servo(self):
Garry Wang8c8dc972020-06-09 13:41:51 -0700210 """ Initialize and setup servo for later use.
211 """
212 self.initilize_servo()
213 self.initialize_dut_for_servo()
214
215
216 def initilize_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700217 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700218
219 Initializes `self._servo` and then verifies that all network
220 connections are working. This will create an ssh tunnel if
221 it's required.
Garry Wang8c8dc972020-06-09 13:41:51 -0700222 """
223 self._servo = servo.Servo(servo_host=self,
224 servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700225
Garry Wang8c8dc972020-06-09 13:41:51 -0700226
227 def initialize_dut_for_servo(self):
228 """This method will do some setup for dut control, e.g. setup
229 main servo_v4 device, and also testing the connection between servo
230 and DUT. As a side effect of testing the connection, all signals on
231 the target servo are reset to default values, and the USB stick is
Richard Barnette9a26ad62016-06-10 12:03:08 -0700232 set to the neutral (off) position.
233 """
Garry Wang8c8dc972020-06-09 13:41:51 -0700234 if not self._servo:
235 raise hosts.AutoservVerifyError('Servo object needs to be'
236 ' initialized before initialize'
237 ' DUT.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700238 timeout, _ = retry.timeout(
Garry Wang8c8dc972020-06-09 13:41:51 -0700239 self._servo.initialize_dut,
240 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700241 if timeout:
Garry Wang8c8dc972020-06-09 13:41:51 -0700242 raise hosts.AutoservVerifyError('Initialize dut for servo timed'
243 ' out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700244
245
246 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700247 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700248
249 If we've previously successfully connected to our servo,
250 disconnect any established ssh tunnel, and set `self._servo`
251 back to `None`.
252 """
253 if self._servo:
254 # N.B. This call is safe even without a tunnel:
255 # rpc_server_tracker.disconnect() silently ignores
256 # unknown ports.
257 self.rpc_server_tracker.disconnect(self.servo_port)
258 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700259
Garry Wangc1288cf2019-12-17 14:58:00 -0800260
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800261 def _create_servod_server_proxy(self):
262 """Create a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700263
264 @returns: An xmlrpclib.ServerProxy that is connected to the servod
265 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700266 """
Garry Wang11b5e872020-03-11 15:14:08 -0700267 if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
268 and not self.is_localhost()):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700269 return self.rpc_server_tracker.xmlrpc_connect(
270 None, self.servo_port,
271 ready_test_name=self.SERVO_READY_METHOD,
Allen Li2b1a8992018-11-27 14:17:18 -0800272 timeout_seconds=60,
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700273 request_timeout_seconds=3600,
274 server_desc=str(self))
Richard Barnette9a26ad62016-06-10 12:03:08 -0700275 else:
276 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
277 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700278
279
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800280 def get_servod_server_proxy(self):
281 """Return a cached proxy if exists; otherwise, create a new one.
282
283 @returns: An xmlrpclib.ServerProxy that is connected to the servod
284 server on the host.
285 """
286 # Single-threaded execution, no race
287 if self._servod_server_proxy is None:
288 self._servod_server_proxy = self._create_servod_server_proxy()
289 return self._servod_server_proxy
290
291
Richard Barnette1edbb162016-11-01 11:47:50 -0700292 def verify(self, silent=False):
293 """Update the servo host and verify it's in a good state.
294
295 @param silent If true, suppress logging in `status.log`.
296 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700297 message = 'Beginning verify for servo host %s port %s serial %s'
298 message %= (self.hostname, self.servo_port, self.servo_serial)
299 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700300 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700301 self._repair_strategy.verify(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700302 self._servo_state = servo_constants.SERVO_STATE_WORKING
303 self.record('INFO', None, None,
304 'ServoHost verify set servo_state as WORKING')
Garry Wang63b8c382020-03-11 22:28:40 -0700305 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700306 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700307 self._servo_state = self.determine_servo_state()
308 self.record('INFO', None, None,
309 'ServoHost verify set servo_state as %s'
310 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700311 if self._is_critical_error(e):
312 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700313
314
Garry Wang7b0e1b72020-03-25 19:08:59 -0700315 def get_image_name_from_usbkey(self, usbkey_dev):
316 """Mount usb drive and check ChromeOS image name on it if there is
317 one. This method assumes the image_usbkey_direction is already set
318 to servo side.
319
320 @param: usbkey_dev usbkey dev path(e.g. /dev/sdb).
321
322 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
323 or empty string if no test image detected, or unexpected
324 error occurred.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700325 """
Garry Wang70e5d062020-04-03 18:01:05 -0700326 logging.info('Checking ChromeOS image name on usbkey.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700327 usb_mount_path = '/media/servo_usb/%s' % self.servo_port
328 unmount_cmd = 'umount %s' % usb_mount_path
329 # ChromeOS root fs is in /dev/sdx3
330 mount_cmd = 'mount -o ro %s3 %s' % (usbkey_dev, usb_mount_path)
331 # Unmount if there is an existing stale mount.
332 self.run(unmount_cmd, ignore_status=True)
333 # Create if the mount point is not existing.
334 self.run('mkdir -p %s' % usb_mount_path)
335 try:
336 # Attempt to mount the usb drive.
337 mount_result = self.run(mount_cmd, ignore_status=True)
Garry Wang70e5d062020-04-03 18:01:05 -0700338 if mount_result.exit_status != 0:
339 logging.error('Unexpected error occurred on mount usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700340 return ''
Garry Wang70e5d062020-04-03 18:01:05 -0700341
342 release_content = self.run(
343 'cat %s/etc/lsb-release' % usb_mount_path,
344 ignore_status=True).stdout.strip()
345
346 if not re.search(r'RELEASE_TRACK=.*test', release_content):
347 logging.info('The image on usbkey is not a test image')
348 return ''
349
350 return lsbrelease_utils.get_chromeos_release_builder_path(
351 lsb_release_content=release_content)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700352 finally:
Garry Wang70e5d062020-04-03 18:01:05 -0700353 logging.debug('Image check compeleted, unmounting the usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700354 self.run(unmount_cmd, ignore_status=True)
355
356
Garry Wang70e5d062020-04-03 18:01:05 -0700357 def _probe_and_validate_usb_dev(self):
358 """This method probe the usb dev path by talking to servo, and then
359 validate the dev path is valid block device to servohost.
360 Possible output:
361 1. Encounter error during probe usb dev, returns empty string.
362 2. probe usb dev completed without error but cannot find usb dev,
363 raise AutoservRepairError.
364 3. probe usb dev find a usb dev path, but failed validation in this
365 method, raise AutoservRepairError.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700366
Garry Wang70e5d062020-04-03 18:01:05 -0700367 @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
368 if unexpected error occurred during probe.
369 @raises: AutoservRepairError if servo couldn't probe the usb dev path
370 (servo.probe_host_usb_dev() returns empty string), or the dev path is
371 not valid block device to servohost.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700372 """
373 logging.info('Validating image usbkey on servo.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700374 try:
Garry Wang70e5d062020-04-03 18:01:05 -0700375 usb_dev = self._servo.probe_host_usb_dev()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700376 except Exception as e:
377 # We don't want any unexpected or transient servo communicating
378 # failure block usb repair, so capture all errors here.
379 logging.error(e, exc_info=True)
380 logging.error('Unexpected error occurred on get usbkey dev path,'
381 ' skipping usbkey validation.')
382 return ''
383
Garry Wang70e5d062020-04-03 18:01:05 -0700384 if usb_dev:
385 # probe_host_usb_dev() sometimes return stale record,
386 # so we need to make sure the path exists in fdisk.
387 validate_cmd = 'fdisk -l | grep %s' % usb_dev
Garry Wang11441182020-06-16 18:34:14 -0700388 try:
389 resp = self.run(validate_cmd, ignore_status=True, timeout=60)
390 if resp.exit_status == 0:
391 return usb_dev
Garry Wang70e5d062020-04-03 18:01:05 -0700392
Garry Wang11441182020-06-16 18:34:14 -0700393 logging.error('%s is reported from "image_usbkey_dev" control'
394 ' but not detected by fdisk!', usb_dev)
395 except error.AutoservRunError as e:
396 if 'Timeout encountered' in str(e):
397 logging.warning('Timeout encountered during fdisk run,'
398 ' skipping usbkey validation.')
399 return ''
400 raise
Garry Wang70e5d062020-04-03 18:01:05 -0700401
402 raise hosts.AutoservRepairError(
403 'No usbkey detected on servo, the usbkey may be either missing'
404 ' or broken. Please replace usbkey on the servo and retry.',
405 'missing usbkey')
406
407
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700408 def is_ec_supported(self):
Garry Wang9b8f2342020-04-17 16:34:09 -0700409 """Check if ec is supported on the servo_board"""
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700410 if self.servo_board:
411 try:
412 frm_config = config.Config(self.servo_board, self.servo_model)
413 return frm_config.chrome_ec
414 except Exception as e:
415 logging.error('Unexpected error when read from firmware'
416 ' configs; %s', str(e))
417 return False
418
Garry Wang70e5d062020-04-03 18:01:05 -0700419 def validate_image_usbkey(self):
420 """This method first validate if there is a recover usbkey on servo
421 that accessible to servohost, and second check if a ChromeOS image is
422 already on the usb drive and return the image_name so we can avoid
423 unnecessary download and flash to the recover usbkey on servo.
424
425 Please note that, there is special error handling logic here:
426 1. If unexpected error happens, we return empty string. So repair
427 actions will not get blocked.
428 2. If no working usbkey present on servo, but no errors, we'll raise
429 AutoservRepairError here.
430
431 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
432 or empty string if no test image detected, or unexpected
433 error occurred.
434 @raises: AutoservRepairError if the usbkey is not detected on servo.
435 """
436 usb_dev = self._probe_and_validate_usb_dev()
437 if usb_dev:
438 return self.get_image_name_from_usbkey(usb_dev)
439 else:
440 return ''
Garry Wang7b0e1b72020-03-25 19:08:59 -0700441
442
Richard Barnette1edbb162016-11-01 11:47:50 -0700443 def repair(self, silent=False):
444 """Attempt to repair servo host.
445
446 @param silent If true, suppress logging in `status.log`.
447 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700448 message = 'Beginning repair for servo host %s port %s serial %s'
449 message %= (self.hostname, self.servo_port, self.servo_serial)
450 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700451 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700452 self._repair_strategy.repair(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700453 self._servo_state = servo_constants.SERVO_STATE_WORKING
454 self.record('INFO', None, None,
455 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700456 # If target is a labstation then try to withdraw any existing
457 # reboot request created by this servo because it passed repair.
458 if self.is_labstation():
459 self.withdraw_reboot_request()
Garry Wang63b8c382020-03-11 22:28:40 -0700460 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700461 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700462 self._servo_state = self.determine_servo_state()
463 self.record('INFO', None, None,
464 'ServoHost repair set servo_state as %s'
465 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700466 if self._is_critical_error(e):
467 self.disconnect_servo()
468 self.stop_servod()
469 raise
470
471
472 def _is_critical_error(self, error):
473 if (isinstance(error, hosts.AutoservVerifyDependencyError)
474 and not error.is_critical()):
475 logging.warning('Non-critical verify failure(s) detected during'
476 ' verify/repair servo, servo connection will'
477 ' still up but may not fully functional.'
478 ' Some repair actions and servo depended'
479 ' tests may not run.')
480 return False
481 logging.info('Critical verify failure(s) detected during repair/verify'
482 ' servo. Disconnecting servo and stop servod, all repair '
483 'action and tests that depends on servo will not run.')
484 return True
Fang Deng5d518f42013-08-02 14:04:32 -0700485
486
Dan Shi4d478522014-02-14 13:46:32 -0800487 def get_servo(self):
488 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700489
Dan Shi4d478522014-02-14 13:46:32 -0800490 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700491 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700492 """
Dan Shi4d478522014-02-14 13:46:32 -0800493 return self._servo
494
495
Garry Wang79e9af62019-06-12 15:19:19 -0700496 def request_reboot(self):
497 """Request servohost to be rebooted when it's safe to by touch a file.
498 """
499 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700500 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700501 self.run('touch %s' % self._reboot_file, ignore_status=True)
502
503
Garry Wang464ff1e2019-07-18 17:20:34 -0700504 def withdraw_reboot_request(self):
505 """Withdraw a servohost reboot request if exists by remove the flag
506 file.
507 """
508 logging.debug('Withdrawing request to reboot servohost %s that created'
509 ' by servo with port # %s if exists.',
510 self.hostname, self.servo_port)
511 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
512
513
Garry Wangc1288cf2019-12-17 14:58:00 -0800514 def start_servod(self, quick_startup=False):
515 """Start the servod process on servohost.
516 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800517 # Skip if running on the localhost.(crbug.com/1038168)
518 if self.is_localhost():
519 logging.debug("Servohost is a localhost, skipping start servod.")
520 return
521
522 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800523 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800524 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800525 if self.servo_model:
526 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800527 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800528 logging.warning('Board for DUT is unknown; starting servod'
529 ' assuming a pre-configured board.')
530
531 cmd += ' PORT=%d' % self.servo_port
532 if self.servo_serial:
533 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800534
535 # Start servod with dual_v4 if the DUT/servo from designated pools.
536 dut_host_info = self.get_dut_host_info()
537 if dut_host_info:
Garry Wang11b5e872020-03-11 15:14:08 -0700538 if bool(dut_host_info.pools &
539 servo_constants.POOLS_SUPPORT_DUAL_V4):
Garry Wangd7367482020-02-27 13:52:40 -0800540 logging.debug('The DUT is detected in following designated'
541 ' pools %s,starting servod with DUAL_V4 option.',
Garry Wang11b5e872020-03-11 15:14:08 -0700542 servo_constants.POOLS_SUPPORT_DUAL_V4)
Garry Wangd7367482020-02-27 13:52:40 -0800543 cmd += ' DUAL_V4=1'
544
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800545 # Remove the symbolic links from the logs. This helps ensure that
546 # a failed servod instantiation does not cause us to grab old logs
547 # by mistake.
548 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800549 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800550
551 # There's a lag between when `start servod` completes and when
552 # the _ServodConnectionVerifier trigger can actually succeed.
553 # The call to time.sleep() below gives time to make sure that
554 # the trigger won't fail after we return.
555
556 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
557 # But in the rare case all servo on a labstation are in heavy use they
558 # may take ~30 seconds. So the timeout value will double these value,
559 # and we'll try quick start up when first time initialize servohost,
560 # and use standard start up timeout in repair.
561 if quick_startup:
Garry Wang11b5e872020-03-11 15:14:08 -0700562 timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800563 else:
Garry Wang11b5e872020-03-11 15:14:08 -0700564 timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800565 logging.debug('Wait %s seconds for servod process fully up.', timeout)
566 time.sleep(timeout)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700567 # Cache the initial instance timestamp to check against servod restarts
568 self._initial_instance_ts = self.get_instance_logs_ts()
Garry Wangc1288cf2019-12-17 14:58:00 -0800569
570
571 def stop_servod(self):
572 """Stop the servod process on servohost.
573 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800574 # Skip if running on the localhost.(crbug.com/1038168)
575 if self.is_localhost():
576 logging.debug("Servohost is a localhost, skipping stop servod.")
577 return
578
Garry Wangc1288cf2019-12-17 14:58:00 -0800579 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800580 self.run('stop servod PORT=%d' % self.servo_port,
581 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800582 logging.debug('Wait %s seconds for servod process fully teardown.',
Garry Wang11b5e872020-03-11 15:14:08 -0700583 servo_constants.SERVOD_TEARDOWN_TIMEOUT)
584 time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
Garry Wangc1288cf2019-12-17 14:58:00 -0800585
586
587 def restart_servod(self, quick_startup=False):
588 """Restart the servod process on servohost.
589 """
590 self.stop_servod()
591 self.start_servod(quick_startup)
592
Garry Wang11b5e872020-03-11 15:14:08 -0700593
Garry Wangffbd2162020-04-17 16:13:48 -0700594 def _process_servodtool_error(self, response):
595 """Helper function to handle non-zero servodtool response.
596 """
597 if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
Garry Wangad245002020-05-15 15:20:23 -0700598 response.stdout):
Garry Wangffbd2162020-04-17 16:13:48 -0700599 logging.error('The servo is not plugged on a usb hub that supports'
600 ' power-cycle!')
Garry Wang000c6c02020-05-11 21:27:23 -0700601 # change the flag so we can update this label in later process.
602 self.smart_usbhub = False
Garry Wangffbd2162020-04-17 16:13:48 -0700603 return
604
605 if re.search(servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
606 self.servo_serial, response.stdout):
607 logging.error('No servo with serial %s found!', self.servo_serial)
608 return
609
610 logging.error('Unexpected error occurred from usbhub control, please'
611 ' file a bug and inform chrome-fleet-software@ team!')
612
613
614 def _get_servo_usb_devnum(self):
615 """Helper function to collect current usb devnum of servo.
616 """
Otabek Kasimov09192682020-06-01 18:17:44 -0700617 # TODO remove try-except when fix crbug.com/1087964
618 try:
619 cmd = 'servodtool device -s %s usb-path' % self.servo_serial
620 resp = self.run(cmd, ignore_status=True, timeout=30)
621 except Exception as e:
622 # Here we catch only timeout errors.
623 # Other errors is filtered by ignore_status=True
624 logging.debug('Attempt to get servo usb-path failed due to '
625 'timeout; %s', e)
626 return ''
627
Garry Wangffbd2162020-04-17 16:13:48 -0700628 if resp.exit_status != 0:
629 self._process_servodtool_error(resp)
630 return ''
631 usb_path = resp.stdout.strip()
632 logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
633
634 resp = self.run('cat %s/devnum' % usb_path,
635 ignore_status=True)
636 if resp.exit_status != 0:
637 self._process_servodtool_error(resp)
638 return ''
639 return resp.stdout.strip()
640
641
642 def _reset_servo(self):
643 logging.info('Resetting servo through smart usbhub.')
Otabek Kasimov09192682020-06-01 18:17:44 -0700644 # TODO remove try-except when fix crbug.com/1087964
645 try:
646 resp = self.run('servodtool device -s %s power-cycle' %
647 self.servo_serial, ignore_status=True,
648 timeout=30)
649 if resp.exit_status != 0:
650 self._process_servodtool_error(resp)
651 return False
652 except Exception as e:
653 # Here we catch only timeout errors.
654 # Other errors is filtered by ignore_status=True
655 logging.debug('Attempt to reset servo failed due to timeout;'
656 ' %s', e)
Garry Wangffbd2162020-04-17 16:13:48 -0700657 return False
658
659 logging.debug('Wait %s seconds for servo to come back from reset.',
660 servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
661 time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
Garry Wang000c6c02020-05-11 21:27:23 -0700662 # change the flag so we can update this label in later process.
663 self.smart_usbhub = True
Garry Wangffbd2162020-04-17 16:13:48 -0700664 return True
665
666
667 def reset_servo(self):
668 """Reset(power-cycle) the servo via smart usbhub.
669 """
670 if not self.is_labstation():
671 logging.info('Servo reset is not applicable to servo_v3.')
672 return
673
674 pre_reset_devnum = self._get_servo_usb_devnum()
675 logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
676 result = self._reset_servo()
677 if not result:
Garry Wangfd5c8b62020-06-08 15:36:54 -0700678 message = ('Failed to reset servo with serial: %s. (Please ignore'
679 ' this error if the DUT is not connected to a smart'
680 ' usbhub).' % self.servo_serial)
Garry Wangffbd2162020-04-17 16:13:48 -0700681 logging.warning(message)
682 self.record('INFO', None, None, message)
683 return
684
685 post_reset_devnum = self._get_servo_usb_devnum()
686 logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
687 if not (pre_reset_devnum and post_reset_devnum):
688 message = ('Servo reset completed but unable to verify'
689 ' devnum change!')
690 elif pre_reset_devnum != post_reset_devnum:
691 message = ('Reset servo with serial %s completed successfully!'
692 % self.servo_serial)
693 else:
694 message = 'Servo reset completed but devnum is still not changed!'
695 logging.info(message)
696 self.record('INFO', None, None, message)
697
698
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800699 def _extract_compressed_logs(self, logdir, relevant_files):
700 """Decompress servod logs in |logdir|.
701
702 @param logdir: directory containing compressed servod logs.
703 @param relevant_files: list of files in |logdir| to consider.
704
705 @returns: tuple, (tarfiles, files) where
706 tarfiles: list of the compressed filenames that have been
707 extracted and deleted
708 files: list of the uncompressed files that were generated
709 """
710 # For all tar-files, first extract them to the directory, and
711 # then let the common flow handle them.
712 tarfiles = [cf for cf in relevant_files if
713 cf.endswith(self.COMPRESSION_SUFFIX)]
714 files = []
715 for f in tarfiles:
716 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
717 with tarfile.open(f) as tf:
718 # Each tarfile has only one member, as
719 # that's the compressed log.
720 member = tf.members[0]
721 # Manipulate so that it only extracts the basename, and not
722 # the directories etc.
723 member.name = norm_name
724 files.append(os.path.join(logdir, member.name))
725 tf.extract(member, logdir)
726 # File has been extracted: remove the compressed file.
727 os.remove(f)
728 return tarfiles, files
729
730 def _extract_mcu_logs(self, log_subdir):
731 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
732
733 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
734 lines from the logs to generate invidiual console logs e.g. after
735 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
736 those MCUs had any console input/output.
737
738 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
739 """
740 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
741 # files
742 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
743 if not os.path.exists(mcu_lines_file):
744 logging.info('No DEBUG logs found to extract MCU logs from.')
745 return
746 mcu_files = {}
747 mcu_file_template = '%s.txt'
748 with open(mcu_lines_file, 'r') as f:
749 for line in f:
750 match = self.MCU_EXTRACTOR.match(line)
751 if match:
752 mcu = match.group(self.MCU_GROUP).lower()
753 line = match.group(self.LINE_GROUP)
754 if mcu not in mcu_files:
755 mcu_file = os.path.join(log_subdir,
756 mcu_file_template % mcu)
757 mcu_files[mcu] = open(mcu_file, 'a')
758 fd = mcu_files[mcu]
759 fd.write(line + '\n')
760 for f in mcu_files:
761 mcu_files[f].close()
762
763
764 def remove_latest_log_symlinks(self):
765 """Remove the conveninence symlinks 'latest' servod logs."""
766 symlink_wildcard = '%s/latest*' % self.remote_log_dir
767 cmd = 'rm ' + symlink_wildcard
768 self.run(cmd, stderr_tee=None, ignore_status=True)
769
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700770 def probe_servod_restart(self, instance_ts, outdir):
771 """Grab servod logs from previous instances if part of this session.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800772
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700773 If since the last time this host called start_servod() servod crashed
774 and restarted, this helper finds those logs as well, and stores them
775 with the |OLD_LOG_SUFFIX| to investigate if necessary.
Prasad Vuppalapu5bd9da12020-03-31 01:46:47 +0000776
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700777 It also issues a panicinfo command to servo devices after the restart
778 to try and collect reboot information for debugging.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800779
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700780 @param instance_ts: the log timestamp that the current instance uses
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800781 @param outdir: directory to create a subdirectory into to place the
782 servod logs into.
783 """
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700784 if self._initial_instance_ts is None:
785 logging.info('No log timestamp grabbed successfully on servod '
786 'startup. Cannot check device restarts. Ignoring.')
787 return
788 if instance_ts == self._initial_instance_ts:
789 logging.debug('Servod appears to have run without restarting')
790 return
791 # Servod seems to have restarted (at least once). |_initial_instance_ts|
792 # is the first timestamp, and instance_ts is the current timestamp. Find
793 # all timestamps in between them, and grab the logs for each.
794 tss = self._find_instance_timestamps_between(self._initial_instance_ts,
795 instance_ts)
796 logging.info('Servod has restarted %d times between the start and the '
797 'end of this servo_host.', len(tss))
798 logging.info('This might be an issue. Will extract all logs from each '
799 'instance.')
800 logging.info('Logs that are not the currently running (about to turn '
801 'down) instance are maked with a .%s in their folder.',
802 self.OLD_LOG_SUFFIX)
803 for ts in tss:
804 self.get_instance_logs(ts, outdir, old=True)
805 # Lastly, servod has restarted due to a potential issue. Try to get
806 # panic information from servo micro and servo v4 for the current logs.
807 # This can only happen if the |_servo| attribute is initialized.
808 if self._servo:
809 for mcu in ['servo_micro', 'servo_v4']:
810 ctrl = '%s_uart_cmd' % mcu
811 if self._servo.has_control(ctrl):
812 logging.info('Trying to retrieve %r panicinfo into logs',
813 mcu)
814 try:
815 self._servo.set_nocheck(ctrl, 'panicinfo')
816 except error.TestFail as e:
817 logging.error('Failed to generate panicinfo for %r '
818 'logs. %s', mcu, str(e))
819
820 def _find_instance_timestamps_between(self, start_ts, end_ts):
821 """Find all log timestamps between [start_ts, end_ts).
822
823 @param start_ts: str, earliest log timestamp of interest
824 @param end_ts: str, latest log timestamp of interest
825
826 @returns: list, all timestamps between start_ts and end_ts, end_ts
827 exclusive, on the servo_host. An empty list on errors
828 """
829 # Simply get all timestamp, and then sort and remove
830 cmd = 'ls %s' % self.remote_log_dir
831 res = self.run(cmd, stderr_tee=None, ignore_status=True)
832 if res.exit_status != 0:
833 # Here we failed to find anything.
834 logging.info('Failed to find remote servod logs. Ignoring.')
835 return []
836 logfiles = res.stdout.strip().split()
837 timestamps = set()
838 for logfile in logfiles:
839 ts_match = self.TS_EXTRACTOR.match(logfile)
840 if not ts_match:
841 # Simply ignore files that fail the check. It might be the
842 # 'latest' symlinks or random files.
843 continue
844 timestamps.add(ts_match.group(self.TS_GROUP))
845 # At this point we have all unique timestamps.
846 timestamps = sorted(timestamps)
847 for ts in [start_ts, end_ts]:
848 if ts not in timestamps:
849 logging.error('Timestamp %r not in servod logs. Cannot query '
850 'for timestamps in between %r and %r', ts,
851 start_ts, end_ts)
852 return []
853 return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
854
855 def get_instance_logs_ts(self):
856 """Retrieve the currently running servod instance's log timestamp
857
858 @returns: str, timestamp for current instance, or None on failure
859 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800860 # First, extract the timestamp. This cmd gives the real filename of
861 # the latest aka current log file.
862 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
863 'then realpath %(dir)s/latest.DEBUG;'
864 'elif [ -f %(dir)s/latest ];'
865 'then realpath %(dir)s/latest;'
866 'else exit %(code)d;'
867 'fi' % {'dir': self.remote_log_dir,
868 'code': self.NO_SYMLINKS_CODE})
869 res = self.run(cmd, stderr_tee=None, ignore_status=True)
870 if res.exit_status != 0:
871 if res.exit_status == self.NO_SYMLINKS_CODE:
872 logging.warning('servod log latest symlinks not found. '
873 'This is likely due to an error starting up '
874 'servod. Ignoring..')
875 else:
876 logging.warning('Failed to find servod logs on servo host.')
877 logging.warning(res.stderr.strip())
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700878 return None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800879 fname = os.path.basename(res.stdout.strip())
880 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
Ruben Rodriguez Buchillone9aa2b02020-03-04 12:14:28 -0800881 ts_match = self.TS_EXTRACTOR.match(fname)
882 if not ts_match:
883 logging.warning('Failed to extract timestamp from servod log file '
884 '%r. Skipping. The servo host is using outdated '
885 'servod logging and needs to be updated.', fname)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700886 return None
887 return ts_match.group(self.TS_GROUP)
888
889 def get_instance_logs(self, instance_ts, outdir, old=False):
890 """Collect all logs with |instance_ts| and dump into a dir in |outdir|
891
892 This method first collects all logs on the servo_host side pertaining
893 to this servod instance (port, instatiation). It glues them together
894 into combined log.[level].txt files and extracts all available MCU
895 console I/O from the logs into individual files e.g. servo_v4.txt
896
897 All the output can be found in a directory inside |outdir| that
898 this generates based on |LOG_DIR|, the servod port, and the instance
899 timestamp on the servo_host side.
900
901 @param instance_ts: log timestamp to grab logfiles for
902 @param outdir: directory to create a subdirectory into to place the
903 servod logs into.
904 @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
905 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800906 # Create the local results log dir.
907 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
908 str(self.servo_port),
909 instance_ts))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700910 if old:
911 log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
912 logging.info('Saving servod logs to %r.', log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800913 os.mkdir(log_dir)
914 # Now, get all files with that timestamp.
915 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
916 instance_ts)
917 res = self.run(cmd, stderr_tee=None, ignore_status=True)
918 files = res.stdout.strip().split()
919 try:
920 self.get_file(files, log_dir, try_rsync=False)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700921 if not os.listdir(log_dir):
922 logging.info('No servod logs retrieved. Ignoring, and removing '
923 '%r again.', log_dir)
924 os.rmdir(log_dir)
925 return
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800926 except error.AutoservRunError as e:
927 result = e.result_obj
928 if result.exit_status != 0:
929 stderr = result.stderr.strip()
930 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
931 stderr or '\n%s' % result)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700932 # Remove the log_dir as nothing was added to it.
933 os.rmdir(log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800934 return
935 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
936 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
937 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
938 # Create the joint files for each loglevel. i.e log.DEBUG
939 joint_file = self.JOINT_LOG_PREFIX
940 if level_name:
941 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
942 # This helps with some online tools to avoid complaints about an
943 # unknown filetype.
944 joint_file = joint_file + '.txt'
945 joint_path = os.path.join(log_dir, joint_file)
946 files = [f for f in local_files if level_name in f]
947 if not files:
948 # TODO(crrev.com/c/1793030): remove no-level case once CL
949 # is pushed
950 continue
951 # Extract compressed logs if any.
952 compressed, extracted = self._extract_compressed_logs(log_dir,
953 files)
954 files = list(set(files) - set(compressed))
955 files.extend(extracted)
956 # Need to sort. As they all share the same timestamp, and
957 # loglevel, the index itself is sufficient. The highest index
958 # is the oldest file, therefore we need a descending sort.
959 def sortkey(f, level=level_name):
960 """Custom sortkey to sort based on rotation number int."""
961 if f.endswith(level_name): return 0
962 return int(f.split('.')[-1])
963
964 files.sort(reverse=True, key=sortkey)
965 # Just rename the first file rather than building from scratch.
966 os.rename(files[0], joint_path)
967 with open(joint_path, 'a') as joint_f:
968 for logfile in files[1:]:
969 # Transfer the file to the joint file line by line.
970 with open(logfile, 'r') as log_f:
971 for line in log_f:
972 joint_f.write(line)
973 # File has been written over. Delete safely.
974 os.remove(logfile)
975 # Need to remove all files form |local_files| so we don't
976 # analyze them again.
977 local_files = list(set(local_files) - set(files) - set(compressed))
978 # Lastly, extract MCU logs from the joint logs.
979 self._extract_mcu_logs(log_dir)
980
Garry Wang79e9af62019-06-12 15:19:19 -0700981 def _lock(self):
982 """lock servohost by touching a file.
983 """
984 logging.debug('Locking servohost %s by touching %s file',
985 self.hostname, self._lock_file)
986 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700987 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -0700988
989
990 def _unlock(self):
991 """Unlock servohost by removing the lock file.
992 """
993 logging.debug('Unlocking servohost by removing %s file',
994 self._lock_file)
995 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700996 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -0700997
998
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700999 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -07001000 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001001 # NOTE: throughout this method there are multiple attempts to catch
1002 # all errors. This is WAI as log grabbing should not fail tests.
1003 # However, the goal is to catch and handle/process all errors, thus
1004 # we print the traceback and ask for a bug.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001005 if self._closed:
1006 logging.debug('ServoHost is already closed.')
1007 return
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001008 instance_ts = self.get_instance_logs_ts()
1009 # TODO(crbug.com/1011516): once enabled, remove the check against
1010 # localhost and instead check against log-rotiation enablement.
1011 logs_available = (instance_ts is not None and
1012 self.job and
1013 not self.is_localhost())
1014 if logs_available:
1015 # Probe whether there was a servod restart, and grab those old
1016 # logs as well.
1017 try:
1018 self.probe_servod_restart(instance_ts, self.job.resultdir)
1019 except (error.AutoservRunError, error.TestFail) as e:
1020 logging.info('Failed to grab servo logs due to: %s. '
1021 'This error is forgiven.', str(e))
1022 except Exception as e:
1023 logging.error('Unexpected error probing for old logs. %s. '
1024 'Forgiven. Please file a bug and fix or catch '
1025 'in log probing function', str(e),
1026 exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001027 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001028 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -07001029 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001030 self._servo.close(outdir)
1031
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001032 if logs_available:
1033 # Grab current (not old like above) logs after the servo instance
1034 # was closed out.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001035 try:
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001036 self.get_instance_logs(instance_ts, self.job.resultdir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001037 except error.AutoservRunError as e:
1038 logging.info('Failed to grab servo logs due to: %s. '
1039 'This error is forgiven.', str(e))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001040 except Exception as e:
1041 logging.error('Unexpected error grabbing servod logs. %s. '
1042 'Forgiven. Please file a bug and fix or catch '
1043 'in log grabbing function', str(e), exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001044
Garry Wang7c00b0f2019-06-25 17:28:17 -07001045 if self._is_locked:
1046 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -07001047 try:
1048 self._unlock()
1049 except error.AutoservSSHTimeout:
1050 logging.error('Unlock servohost failed due to ssh timeout.'
1051 ' It may caused by servohost went down during'
1052 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -08001053 # We want always stop servod after task to minimum the impact of bad
1054 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -08001055 try:
1056 self.stop_servod()
1057 except error.AutoservRunError as e:
1058 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001059 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -08001060
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001061 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001062 # Mark closed.
1063 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001064
1065
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001066 def get_servo_state(self):
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001067 return self._servo_state
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001068
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001069 def _get_host_metrics_data(self):
1070 return {'port': self.servo_port,
Otabek Kasimov0ea47362020-07-11 20:55:09 -07001071 'host': self.get_dut_hostname() or self.hostname,
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001072 'board': self.servo_board or ''}
1073
1074 def _is_servo_device_connected(self, servo_type, serial):
1075 """Check if device is connected to the labstation.
1076
1077 Works for all servo devices connected to the labstation.
1078 For servo_v3 please use 'self._is_servo_board_present_on_servo_v3'
1079
1080 @param servo_type: The type of servo device. Expecting value can be
1081 servo_v4 or servo_micro.
1082 @param serial: The serial number of the device to detect it.
1083 """
1084 vid_pid = self.SERVO_VID_PID.get(servo_type)
1085 if not vid_pid or not serial:
1086 # device cannot detected without VID/PID or serial number
1087 return False
1088 logging.debug('Started to detect %s', servo_type)
1089 try:
1090 cmd = 'lsusb -v -d %s |grep iSerial |grep %s' % (vid_pid, serial)
1091 result = self.run(cmd, ignore_status=True, timeout=30)
1092 if result.exit_status == 0 and result.stdout.strip():
1093 logging.debug('The %s is plugged in to the host.', servo_type)
1094 return True
1095 logging.debug('%s device is not detected; %s', servo_type, result)
1096 return False
1097 except Exception as e:
1098 # can be triggered by timeout issue due running the script
1099 metrics.Counter(
1100 'chromeos/autotest/repair/servo_detection/timeout'
1101 ).increment(fields=self._get_host_metrics_data())
1102 logging.error('%s device is not detected; %s', servo_type, str(e))
1103 return None
1104
1105 def _is_servo_board_present_on_servo_v3(self):
1106 """Check if servo board is detected on servo_v3"""
1107 vid_pids = self.SERVO_VID_PID['servo_v3']
1108 if not vid_pids or len(vid_pids) == 0:
1109 # device cannot detected without VID/PID
1110 return False
1111 logging.debug('Started to detect servo board on servo_v3')
1112 not_detected = 'The servo board is not detected on servo_v3'
1113 try:
1114 cmd = 'lsusb | grep "%s"' % "\|".join(vid_pids)
1115 result = self.run(cmd, ignore_status=True, timeout=30)
1116 if result.exit_status == 0 and result.stdout.strip():
1117 logging.debug('The servo board is detected on servo_v3')
1118 return True
1119 logging.debug('%s; %s', not_detected, result)
1120 return False
1121 except Exception as e:
1122 # can be triggered by timeout issue due running the script
1123 metrics.Counter(
1124 'chromeos/autotest/repair/servo_detection/timeout'
1125 ).increment(fields=self._get_host_metrics_data())
1126 logging.error('%s; %s', not_detected, str(e))
1127 return None
1128
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001129 def _is_main_device_not_detected_on_servo_v4(self):
1130 """Check if servod cannot find main device on servo.
1131
1132 The check based on reading servod logs for servo_v4.
1133 """
1134 if not self._initial_instance_ts:
1135 # latest log not found
1136 return False
1137 logging.debug('latest log for servod created at %s',
1138 self._initial_instance_ts)
1139 try:
1140 log_created = calendar.timegm(time.strptime(
1141 self._initial_instance_ts,
1142 "%Y-%m-%d--%H-%M-%S.%f"))
1143 except ValueError as e:
1144 logging.debug('Cannot read time from log file name: %s',
1145 self._initial_instance_ts)
1146 return False
1147 min_time_created = calendar.timegm(time.gmtime())
1148 if min_time_created > log_created + 3600:
1149 # the log file is old we cannot use it
1150 logging.debug('log file was created more than hour ago, too old')
1151 return False
1152 logging.debug('latest log was created not longer then 1 hour ago')
1153
1154 # check if servod can detect main device by servo_v4
1155 message = 'ERROR - No servo micro or CCD detected for board'
1156 cmd = ('cat /var/log/servod_%s/log.%s.INFO |grep "%s"'
1157 % (self.servo_port, self._initial_instance_ts, message))
1158 result = self.run(cmd, ignore_status=True)
1159 if result.stdout.strip():
1160 logging.info('Servod cannot detect main device on the servo; '
1161 'Can be caused by bad hardware of servo or '
1162 'issue on the DUT side.')
1163 return True
1164 logging.debug('The main device is detected')
1165 return False
1166
Otabek Kasimov15963492020-06-23 21:10:51 -07001167 def get_verify_state(self, tag):
1168 """Return the state of servo verifier.
1169
1170 @returns: bool or None
1171 """
1172 return self._repair_strategy.verifier_is_good(tag)
1173
1174 def determine_servo_state(self):
1175 """Determine servo state based on the failed verifier.
1176
1177 @returns: servo state value
1178 The state detecting based on first fail verifier or collecting of
1179 them.
1180 """
1181 ssh = self.get_verify_state('servo_ssh')
1182 disk_space = self.get_verify_state('disk_space')
1183 start_servod = self.get_verify_state('servod_job')
1184 create_servo = self.get_verify_state('servod_connection')
1185 init_servo = self.get_verify_state('servod_control')
1186 pwr_button = self.get_verify_state('pwr_button')
1187 lid_open = self.get_verify_state('lid_open')
1188 ec_board = self.get_verify_state('ec_board')
1189 ccd_testlab = self.get_verify_state('ccd_testlab')
1190
1191 if not ssh:
1192 return servo_constants.SERVO_STATE_NO_SSH
1193
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001194 if start_servod == self.VERIFY_FAILED:
1195 # can be cause if device is not connected to the servo host
1196 if self.is_labstation():
1197 if not self.servo_serial:
1198 return servo_constants.SERVO_STATE_WRONG_CONFIG
1199 if self._is_servo_device_connected(
1200 'servo_v4',
1201 self.servo_serial) == False:
1202 return servo_constants.SERVO_STATE_NOT_CONNECTED
1203 elif self._is_servo_board_present_on_servo_v3() == False:
1204 return servo_constants.SERVO_STATE_NOT_CONNECTED
1205 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1206
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001207 if create_servo == self.VERIFY_FAILED:
1208 if (self.is_labstation()
1209 and self._is_main_device_not_detected_on_servo_v4()):
1210 servo_type = None
1211 if self.get_dut_host_info():
1212 servo_type = self.get_dut_host_info().get_label_value(
1213 servo_constants.SERVO_TYPE_LABEL_PREFIX)
1214 if servo_type and 'servo_micro' in servo_type:
1215 serial = self.get_servo_micro_serial_number()
1216 logging.debug('servo_micro serial: %s', serial)
1217 if self._is_servo_device_detected('servo_micro',
1218 serial):
1219 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1220 # Device can be not detected because of DUT
1221 # TODO (otabek) update after b/159755652 and b/159754985
1222 metrics.Counter(
1223 'chromeos/autotest/repair/servo_state/needs_replacement'
1224 ).increment(fields=self._get_host_metrics_data())
1225 elif not self.is_labstation():
1226 # Here need logic to check if flex cable is connected
1227 pass
1228
Otabek Kasimov15963492020-06-23 21:10:51 -07001229 # one of the reason why servo can not initialized
1230 if ccd_testlab == self.VERIFY_FAILED:
1231 return servo_constants.SERVO_STATE_CCD_TESTLAB_ISSUE
1232
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001233 if (create_servo == self.VERIFY_FAILED
1234 or init_servo == self.VERIFY_FAILED):
Otabek Kasimov15963492020-06-23 21:10:51 -07001235 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1236
1237 if pwr_button == self.VERIFY_FAILED:
1238 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1239 if lid_open == self.VERIFY_FAILED:
1240 return servo_constants.SERVO_STATE_LID_OPEN_FAILED
1241 if ec_board == self.VERIFY_FAILED:
1242 return servo_constants.SERVO_STATE_EC_BROKEN
1243
Otabek Kasimov15963492020-06-23 21:10:51 -07001244 metrics.Counter(
1245 'chromeos/autotest/repair/unknown_servo_state'
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001246 ).increment(fields=self._get_host_metrics_data())
Otabek Kasimov15963492020-06-23 21:10:51 -07001247 logging.info('We do not have special state for this failure yet :)')
1248 return servo_constants.SERVO_STATE_BROKEN
1249
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001250
Richard Barnetteea3e4602016-06-10 12:36:41 -07001251def make_servo_hostname(dut_hostname):
1252 """Given a DUT's hostname, return the hostname of its servo.
1253
1254 @param dut_hostname: hostname of a DUT.
1255
1256 @return hostname of the DUT's servo.
1257
1258 """
1259 host_parts = dut_hostname.split('.')
1260 host_parts[0] = host_parts[0] + '-servo'
1261 return '.'.join(host_parts)
1262
1263
1264def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001265 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001266
1267 @param servo_hostname: hostname of the servo host.
1268
1269 @return True if it's up, False otherwise
1270 """
1271 # Technically, this duplicates the SSH ping done early in the servo
1272 # proxy initialization code. However, this ping ends in a couple
1273 # seconds when if fails, rather than the 60 seconds it takes to decide
1274 # that an SSH ping has timed out. Specifically, that timeout happens
1275 # when our servo DNS name resolves, but there is no host at that IP.
1276 logging.info('Pinging servo host at %s', servo_hostname)
1277 ping_config = ping_runner.PingConfig(
1278 servo_hostname, count=3,
1279 ignore_result=True, ignore_status=True)
1280 return ping_runner.PingRunner().ping(ping_config).received > 0
1281
1282
Richard Barnettee519dcd2016-08-15 17:37:17 -07001283def _map_afe_board_to_servo_board(afe_board):
1284 """Map a board we get from the AFE to a servo appropriate value.
1285
1286 Many boards are identical to other boards for servo's purposes.
1287 This function makes that mapping.
1288
1289 @param afe_board string board name received from AFE.
1290 @return board we expect servo to have.
1291
1292 """
1293 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1294 BOARD_MAP = {'gizmo': 'panther'}
1295 mapped_board = afe_board
1296 if afe_board in BOARD_MAP:
1297 mapped_board = BOARD_MAP[afe_board]
1298 else:
1299 for suffix in KNOWN_SUFFIXES:
1300 if afe_board.endswith(suffix):
1301 mapped_board = afe_board[0:-len(suffix)]
1302 break
1303 if mapped_board != afe_board:
1304 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1305 return mapped_board
1306
1307
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001308def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001309 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001310
Richard Barnetteea3e4602016-06-10 12:36:41 -07001311 @param dut_host Instance of `Host` on which to find the servo
1312 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -07001313 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001314 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001315 info = dut_host.host_info_store.get()
1316 servo_args = {k: v for k, v in info.attributes.iteritems()
Garry Wang11b5e872020-03-11 15:14:08 -07001317 if k in servo_constants.SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -07001318
Garry Wang11b5e872020-03-11 15:14:08 -07001319 if servo_constants.SERVO_PORT_ATTR in servo_args:
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001320 try:
Garry Wang11b5e872020-03-11 15:14:08 -07001321 servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1322 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001323 except ValueError:
1324 logging.error('servo port is not an int: %s',
Garry Wang11b5e872020-03-11 15:14:08 -07001325 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001326 # Reset servo_args because we don't want to use an invalid port.
Garry Wang11b5e872020-03-11 15:14:08 -07001327 servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001328
1329 if info.board:
Garry Wang11b5e872020-03-11 15:14:08 -07001330 servo_board = _map_afe_board_to_servo_board(info.board)
1331 servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -07001332 if info.model:
Garry Wang11b5e872020-03-11 15:14:08 -07001333 servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1334 return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -07001335
1336
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001337def _tweak_args_for_ssp_moblab(servo_args):
Garry Wang11b5e872020-03-11 15:14:08 -07001338 if (servo_args[servo_constants.SERVO_HOST_ATTR]
1339 in ['localhost', '127.0.0.1']):
1340 servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001341 'SSP', 'host_container_ip', type=str, default=None)
1342
1343
Dan Shi023aae32016-05-25 11:13:01 -07001344def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001345 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001346 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -08001347
Richard Barnette9a26ad62016-06-10 12:03:08 -07001348 This function attempts to create and verify or repair a `ServoHost`
1349 object for a servo connected to the given `dut`, subject to various
1350 constraints imposed by the parameters:
1351 * When the `servo_args` parameter is not `None`, a servo
1352 host must be created, and must be checked with `repair()`.
1353 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1354 true:
1355 * If `try_servo_repair` is true, then create a servo host and
1356 check it with `repair()`.
1357 * Otherwise, if the servo responds to `ping` then create a
1358 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -08001359
Richard Barnette9a26ad62016-06-10 12:03:08 -07001360 In cases where `servo_args` was not `None`, repair failure
1361 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001362 are logged and then discarded. Note that this only happens in cases
1363 where we're called from a test (not special task) control file that
1364 has an explicit dependency on servo. In that case, we require that
1365 repair not write to `status.log`, so as to avoid polluting test
1366 results.
1367
1368 TODO(jrbarnette): The special handling for servo in test control
1369 files is a thorn in my flesh; I dearly hope to see it cut out before
1370 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001371
1372 Parameters for a servo host consist of a host name, port number, and
1373 DUT board, and are determined from one of these sources, in order of
1374 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -07001375 * Servo attributes from the `dut` parameter take precedence over
1376 all other sources of information.
1377 * If a DNS entry for the servo based on the DUT hostname exists in
1378 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -07001379 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001380 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -07001381 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001382
1383 @param dut An instance of `Host` from which to take
1384 servo parameters (if available).
1385 @param servo_args A dictionary with servo parameters to use if
1386 they can't be found from `dut`. If this
1387 argument is supplied, unrepaired exceptions
1388 from `verify()` will be passed back to the
1389 caller.
1390 @param try_lab_servo If not true, servo host creation will be
1391 skipped unless otherwise required by the
1392 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001393 @param try_servo_repair If true, check a servo host with
1394 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -08001395
1396 @returns: A ServoHost object or None. See comments above.
1397
1398 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001399 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001400 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001401 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -07001402 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001403 if utils.in_moblab_ssp():
1404 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001405 logging.debug(
1406 'Overriding provided servo_args (%s) with arguments'
1407 ' determined from the host (%s)',
1408 servo_args,
1409 servo_args_override,
1410 )
Richard Barnetteea3e4602016-06-10 12:36:41 -07001411 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001412
Richard Barnetteea3e4602016-06-10 12:36:41 -07001413 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001414 logging.debug('No servo_args provided, and failed to find overrides.')
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001415 if try_lab_servo or servo_dependency:
Otabek Kasimov646812c2020-06-23 20:01:36 -07001416 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001417 else:
1418 # For regular test case which not required the servo
1419 return None, None
1420
Garry Wang11b5e872020-03-11 15:14:08 -07001421 servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1422 servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001423 if not _is_servo_host_information_exist(servo_hostname, servo_port):
1424 logging.debug(
1425 'Servo connection info missed hostname: %s , port: %s',
1426 servo_hostname, servo_port)
Otabek Kasimov646812c2020-06-23 20:01:36 -07001427 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001428 if not is_servo_host_information_valid(servo_hostname, servo_port):
1429 logging.debug(
1430 'Servo connection info is incorrect hostname: %s , port: %s',
1431 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001432 return None, servo_constants.SERVO_STATE_WRONG_CONFIG
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001433 if (not servo_dependency and not try_servo_repair and
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001434 not servo_host_is_up(servo_hostname)):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001435 logging.debug('ServoHost is not up.')
Otabek Kasimov646812c2020-06-23 20:01:36 -07001436 return None, servo_constants.SERVO_STATE_NO_SSH
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001437
Garry Wangebc015b2019-06-06 17:45:06 -07001438 newhost = ServoHost(**servo_args)
Garry Wangffbd2162020-04-17 16:13:48 -07001439
1440 # Reset servo if the servo is locked, as we check if the servohost is up,
1441 # if the servohost is labstation and if the servohost is in lab inside the
1442 # locking logic. Also check try_servo_repair to make sure we only do this
1443 # in AdminRepair tasks.
1444 if newhost._is_locked and try_servo_repair:
1445 newhost.reset_servo()
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -07001446 if dut:
1447 newhost.set_dut_hostname(dut.hostname)
Garry Wangffbd2162020-04-17 16:13:48 -07001448
Garry Wangcdd27b22020-01-13 14:59:11 -08001449 try:
1450 newhost.restart_servod(quick_startup=True)
1451 except error.AutoservSSHTimeout:
1452 logging.warning("Restart servod failed due ssh connection "
1453 "to servohost timed out. This error is forgiven"
1454 " here, we will retry in servo repair process.")
1455 except error.AutoservRunError as e:
1456 logging.warning("Restart servod failed due to:\n%s\n"
1457 "This error is forgiven here, we will retry"
1458 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -07001459
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001460 # TODO(gregorynisbet): Clean all of this up.
1461 logging.debug('create_servo_host: attempt to set info store on '
1462 'servo host')
1463 try:
1464 if dut_host_info is None:
1465 logging.debug('create_servo_host: dut_host_info is '
1466 'None, skipping')
1467 else:
1468 newhost.set_dut_host_info(dut_host_info)
1469 logging.debug('create_servo_host: successfully set info '
1470 'store')
1471 except Exception:
1472 logging.error("create_servo_host: (%s)", traceback.format_exc())
1473
Richard Barnette9a26ad62016-06-10 12:03:08 -07001474 # Note that the logic of repair() includes everything done
1475 # by verify(). It's sufficient to call one or the other;
1476 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001477 if servo_dependency:
1478 newhost.repair(silent=True)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001479 return newhost, newhost.get_servo_state()
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001480
1481 if try_servo_repair:
1482 try:
1483 newhost.repair()
1484 except Exception:
1485 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -07001486 else:
1487 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001488 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001489 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001490 logging.exception('servo verify failed for %s', newhost.hostname)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001491 return newhost, newhost.get_servo_state()
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001492
1493
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001494def _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001495 if hostname is None or len(hostname.strip()) == 0:
1496 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001497 if port is None:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001498 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001499 if not type(port) is int:
1500 try:
1501 int(port)
1502 except ValueError:
1503 return False
1504
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001505 return True
1506
1507
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001508def is_servo_host_information_valid(hostname, port):
Garry Wang9b8f2342020-04-17 16:34:09 -07001509 """Check if provided servo attributes are valid.
1510
1511 @param hostname Hostname of the servohost.
1512 @param port servo port number.
1513
1514 @returns: A bool value to indicate if provided servo attribute valid.
1515 """
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001516 if not _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001517 return False
1518 # checking range and correct of the port
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001519 port_int = int(port)
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001520 if port_int < 1 or port_int > 65000:
1521 return False
1522 # we expecting host contain only latters, digits and '-' or '_'
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001523 if not re.match('[a-zA-Z0-9-_\.]*$', hostname) or len(hostname) < 5:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001524 return False
1525 return True