blob: 07f42e819f6aa2a04991f422b21a3b67e2e535a0 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
Fang Deng5d518f42013-08-02 14:04:32 -070012import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060013import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080014import re
15import tarfile
Garry Wangc1288cf2019-12-17 14:58:00 -080016import time
Gregory Nisbet265a52c2019-12-10 20:38:42 -080017import traceback
Dana Goyette4dc0adc2019-05-06 14:51:53 -070018import xmlrpclib
Otabek Kasimov120b6fa2020-07-03 00:15:27 -070019import calendar
Fang Deng5d518f42013-08-02 14:04:32 -070020
21from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070022from autotest_lib.client.common_lib import error
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Garry Wang7b0e1b72020-03-25 19:08:59 -070024from autotest_lib.client.common_lib import lsbrelease_utils
Fang Deng5d518f42013-08-02 14:04:32 -070025from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070027from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000028from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070029from autotest_lib.server.hosts import base_servohost
Garry Wang11b5e872020-03-11 15:14:08 -070030from autotest_lib.server.hosts import servo_constants
Otabek Kasimov4ea636e2020-04-14 23:35:06 -070031from autotest_lib.server.cros.faft.utils import config
Garry Wang11b5e872020-03-11 15:14:08 -070032from autotest_lib.client.common_lib import global_config
Otabek Kasimov8475cce2020-07-14 12:11:31 -070033from autotest_lib.site_utils.admin_audit import servo_updater
Garry Wangd7367482020-02-27 13:52:40 -080034
Otabek Kasimov15963492020-06-23 21:10:51 -070035try:
36 from chromite.lib import metrics
37except ImportError:
38 metrics = utils.metrics_mock
39
Dan Shi3b2adf62015-09-02 17:46:54 -070040_CONFIG = global_config.global_config
Fang Deng5d518f42013-08-02 14:04:32 -070041
Otabek Kasimova7ba91a2020-03-09 08:31:01 -070042
Garry Wangebc015b2019-06-06 17:45:06 -070043class ServoHost(base_servohost.BaseServoHost):
44 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070045 that with a servo instance for a specific port.
46
47 @type _servo: servo.Servo | None
48 """
Fang Deng5d518f42013-08-02 14:04:32 -070049
Raul E Rangel52ca2e82018-07-03 14:10:14 -060050 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070051
Dan Shie5b3c512014-08-21 12:12:09 -070052 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070053 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070054
xixuan6cf6d2f2016-01-29 15:29:00 -080055 # Ready test function
56 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070057
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080058 # Directory prefix on the servo host where the servod logs are stored.
59 SERVOD_LOG_PREFIX = '/var/log/servod'
60
61 # Exit code to use when symlinks for servod logs are not found.
62 NO_SYMLINKS_CODE = 9
63
64 # Directory in the job's results directory to dump the logs into.
65 LOG_DIR = 'servod'
66
67 # Prefix for joint loglevel files in the logs.
68 JOINT_LOG_PREFIX = 'log'
69
70 # Regex group to extract timestamp from logfile name.
71 TS_GROUP = 'ts'
72
73 # This regex is used to extract the timestamp from servod logs.
74 # files always start with log.
75 TS_RE = (r'log.'
76 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
77 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
78 # The loglevel is optional depending on labstation version.
79 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
80 TS_EXTRACTOR = re.compile(TS_RE)
81
82 # Regex group to extract MCU name from logline in servod logs.
83 MCU_GROUP = 'mcu'
84
85 # Regex group to extract logline from MCU logline in servod logs.
86 LINE_GROUP = 'line'
87
88 # This regex is used to extract the mcu and the line content from an
89 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
90 # Here is an example log-line:
91 #
92 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
93 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
94 #
95 # Here is conceptually how they are formatted:
96 #
97 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
98 # <output>
99 #
100 # The log format starts with a timestamp
101 MCU_RE = (r'[\d\-]+ [\d:,]+ '
102 # The mcu that is logging this is next.
103 r'- (?P<%s>\w+) - '
104 # Next, we have more log outputs before the actual line.
105 # Information about the file line, logging function etc.
106 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
107 # NOTE: if the log format changes, this regex needs to be
108 # adjusted.
109 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
110 # Lastly, we get the MCU's console line.
111 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
112 MCU_EXTRACTOR = re.compile(MCU_RE)
113
114 # Suffix to identify compressed logfiles.
115 COMPRESSION_SUFFIX = '.tbz2'
116
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700117 # A suffix to mark servod log directories that came from instance that
118 # ran during this servo_host, but are not the last one running e.g. when
119 # an instance (on purpose, or due to a bug) restarted in the middle of the
120 # run.
121 OLD_LOG_SUFFIX = 'old'
122
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700123 # Mapping servo board with their vid-pid
124 SERVO_VID_PID = {
125 'servo_v4':'18d1:501b',
126 'ccd_cr50':'18d1:5014',
127 'servo_micro':'18d1:501a',
128 'servo_v3':['18d1:5004', '0403:6014'],
129 }
130
Otabek Kasimov15963492020-06-23 21:10:51 -0700131 # States of verifiers
132 # True - verifier run and passed
133 # False - verifier run and failed
134 # None - verifier did not run or dependency failed
135 VERIFY_SUCCESS = True
136 VERIFY_FAILED = False
137 VERIFY_NOT_RUN = None
138
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800139 def _init_attributes(self):
140 self._servo_state = None
141 self.servo_port = None
142 self.servo_board = None
143 self.servo_model = None
144 self.servo_serial = None
Garry Wang000c6c02020-05-11 21:27:23 -0700145 # The flag that indicate if a servo is connected to a smart usbhub.
146 # TODO(xianuowang@) remove this flag once all usbhubs in the lab
147 # get replaced.
148 self.smart_usbhub = None
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800149 self._servo = None
150 self._servod_server_proxy = None
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700151 self._initial_instance_ts = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800152 # Flag to make sure that multiple calls to close do not result in the
153 # logic executing multiple times.
154 self._closed = False
Fang Deng5d518f42013-08-02 14:04:32 -0700155
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700156 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700157 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700158 servo_model=None, servo_serial=None, is_in_lab=None,
159 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700160 """Initialize a ServoHost instance.
161
162 A ServoHost instance represents a host that controls a servo.
163
164 @param servo_host: Name of the host where the servod process
165 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600166 @param servo_port: Port the servod process is listening on. Defaults
167 to the SERVOD_PORT environment variable if set,
168 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700169 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700170 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800171 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
172 to None, for which utils.host_is_in_lab_zone will be
173 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700174
175 """
176 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700177 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800178 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700179 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700180 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700181 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700182 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800183
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800184 # The location of the log files on the servo host for this instance.
185 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
186 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700187 # Path of the servo host lock file.
188 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
189 + self.LOCK_FILE_POSTFIX)
190 # File path to declare a reboot request.
191 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
192 + self.REBOOT_FILE_POSTFIX)
193
194 # Lock the servo host if it's an in-lab labstation to prevent other
195 # task to reboot it until current task completes. We also wait and
196 # make sure the labstation is up here, in the case of the labstation is
197 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700198 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700199 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
200 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700201 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700202
Richard Barnette9a26ad62016-06-10 12:03:08 -0700203 self._repair_strategy = (
204 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700205
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700206 def __str__(self):
207 return "<%s '%s:%s'>" % (
208 type(self).__name__, self.hostname, self.servo_port)
209
Richard Barnette9a26ad62016-06-10 12:03:08 -0700210 def connect_servo(self):
Garry Wang8c8dc972020-06-09 13:41:51 -0700211 """ Initialize and setup servo for later use.
212 """
213 self.initilize_servo()
214 self.initialize_dut_for_servo()
215
216
217 def initilize_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700218 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700219
220 Initializes `self._servo` and then verifies that all network
221 connections are working. This will create an ssh tunnel if
222 it's required.
Garry Wang8c8dc972020-06-09 13:41:51 -0700223 """
224 self._servo = servo.Servo(servo_host=self,
225 servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700226
Garry Wang8c8dc972020-06-09 13:41:51 -0700227
228 def initialize_dut_for_servo(self):
229 """This method will do some setup for dut control, e.g. setup
230 main servo_v4 device, and also testing the connection between servo
231 and DUT. As a side effect of testing the connection, all signals on
232 the target servo are reset to default values, and the USB stick is
Richard Barnette9a26ad62016-06-10 12:03:08 -0700233 set to the neutral (off) position.
234 """
Garry Wang8c8dc972020-06-09 13:41:51 -0700235 if not self._servo:
236 raise hosts.AutoservVerifyError('Servo object needs to be'
237 ' initialized before initialize'
238 ' DUT.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700239 timeout, _ = retry.timeout(
Garry Wang8c8dc972020-06-09 13:41:51 -0700240 self._servo.initialize_dut,
241 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700242 if timeout:
Garry Wang8c8dc972020-06-09 13:41:51 -0700243 raise hosts.AutoservVerifyError('Initialize dut for servo timed'
244 ' out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700245
246
247 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700248 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700249
250 If we've previously successfully connected to our servo,
251 disconnect any established ssh tunnel, and set `self._servo`
252 back to `None`.
253 """
254 if self._servo:
255 # N.B. This call is safe even without a tunnel:
256 # rpc_server_tracker.disconnect() silently ignores
257 # unknown ports.
258 self.rpc_server_tracker.disconnect(self.servo_port)
259 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700260
Garry Wangc1288cf2019-12-17 14:58:00 -0800261
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800262 def _create_servod_server_proxy(self):
263 """Create a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700264
265 @returns: An xmlrpclib.ServerProxy that is connected to the servod
266 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700267 """
Garry Wang11b5e872020-03-11 15:14:08 -0700268 if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
269 and not self.is_localhost()):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700270 return self.rpc_server_tracker.xmlrpc_connect(
271 None, self.servo_port,
272 ready_test_name=self.SERVO_READY_METHOD,
Allen Li2b1a8992018-11-27 14:17:18 -0800273 timeout_seconds=60,
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700274 request_timeout_seconds=3600,
275 server_desc=str(self))
Richard Barnette9a26ad62016-06-10 12:03:08 -0700276 else:
277 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
278 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700279
280
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800281 def get_servod_server_proxy(self):
282 """Return a cached proxy if exists; otherwise, create a new one.
283
284 @returns: An xmlrpclib.ServerProxy that is connected to the servod
285 server on the host.
286 """
287 # Single-threaded execution, no race
288 if self._servod_server_proxy is None:
289 self._servod_server_proxy = self._create_servod_server_proxy()
290 return self._servod_server_proxy
291
292
Richard Barnette1edbb162016-11-01 11:47:50 -0700293 def verify(self, silent=False):
294 """Update the servo host and verify it's in a good state.
295
296 @param silent If true, suppress logging in `status.log`.
297 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700298 message = 'Beginning verify for servo host %s port %s serial %s'
299 message %= (self.hostname, self.servo_port, self.servo_serial)
300 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700301 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700302 self._repair_strategy.verify(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700303 self._servo_state = servo_constants.SERVO_STATE_WORKING
304 self.record('INFO', None, None,
305 'ServoHost verify set servo_state as WORKING')
Garry Wang63b8c382020-03-11 22:28:40 -0700306 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700307 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700308 self._servo_state = self.determine_servo_state()
309 self.record('INFO', None, None,
310 'ServoHost verify set servo_state as %s'
311 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700312 if self._is_critical_error(e):
313 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700314
315
Garry Wang7b0e1b72020-03-25 19:08:59 -0700316 def get_image_name_from_usbkey(self, usbkey_dev):
317 """Mount usb drive and check ChromeOS image name on it if there is
318 one. This method assumes the image_usbkey_direction is already set
319 to servo side.
320
321 @param: usbkey_dev usbkey dev path(e.g. /dev/sdb).
322
323 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
324 or empty string if no test image detected, or unexpected
325 error occurred.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700326 """
Garry Wang70e5d062020-04-03 18:01:05 -0700327 logging.info('Checking ChromeOS image name on usbkey.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700328 usb_mount_path = '/media/servo_usb/%s' % self.servo_port
329 unmount_cmd = 'umount %s' % usb_mount_path
330 # ChromeOS root fs is in /dev/sdx3
331 mount_cmd = 'mount -o ro %s3 %s' % (usbkey_dev, usb_mount_path)
332 # Unmount if there is an existing stale mount.
333 self.run(unmount_cmd, ignore_status=True)
334 # Create if the mount point is not existing.
335 self.run('mkdir -p %s' % usb_mount_path)
336 try:
337 # Attempt to mount the usb drive.
338 mount_result = self.run(mount_cmd, ignore_status=True)
Garry Wang70e5d062020-04-03 18:01:05 -0700339 if mount_result.exit_status != 0:
340 logging.error('Unexpected error occurred on mount usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700341 return ''
Garry Wang70e5d062020-04-03 18:01:05 -0700342
343 release_content = self.run(
344 'cat %s/etc/lsb-release' % usb_mount_path,
345 ignore_status=True).stdout.strip()
346
347 if not re.search(r'RELEASE_TRACK=.*test', release_content):
348 logging.info('The image on usbkey is not a test image')
349 return ''
350
351 return lsbrelease_utils.get_chromeos_release_builder_path(
352 lsb_release_content=release_content)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700353 finally:
Garry Wang70e5d062020-04-03 18:01:05 -0700354 logging.debug('Image check compeleted, unmounting the usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700355 self.run(unmount_cmd, ignore_status=True)
356
357
Garry Wang70e5d062020-04-03 18:01:05 -0700358 def _probe_and_validate_usb_dev(self):
359 """This method probe the usb dev path by talking to servo, and then
360 validate the dev path is valid block device to servohost.
361 Possible output:
362 1. Encounter error during probe usb dev, returns empty string.
363 2. probe usb dev completed without error but cannot find usb dev,
364 raise AutoservRepairError.
365 3. probe usb dev find a usb dev path, but failed validation in this
366 method, raise AutoservRepairError.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700367
Garry Wang70e5d062020-04-03 18:01:05 -0700368 @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
369 if unexpected error occurred during probe.
370 @raises: AutoservRepairError if servo couldn't probe the usb dev path
371 (servo.probe_host_usb_dev() returns empty string), or the dev path is
372 not valid block device to servohost.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700373 """
374 logging.info('Validating image usbkey on servo.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700375 try:
Garry Wang70e5d062020-04-03 18:01:05 -0700376 usb_dev = self._servo.probe_host_usb_dev()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700377 except Exception as e:
378 # We don't want any unexpected or transient servo communicating
379 # failure block usb repair, so capture all errors here.
380 logging.error(e, exc_info=True)
381 logging.error('Unexpected error occurred on get usbkey dev path,'
382 ' skipping usbkey validation.')
383 return ''
384
Garry Wang70e5d062020-04-03 18:01:05 -0700385 if usb_dev:
386 # probe_host_usb_dev() sometimes return stale record,
387 # so we need to make sure the path exists in fdisk.
388 validate_cmd = 'fdisk -l | grep %s' % usb_dev
Garry Wang11441182020-06-16 18:34:14 -0700389 try:
390 resp = self.run(validate_cmd, ignore_status=True, timeout=60)
391 if resp.exit_status == 0:
392 return usb_dev
Garry Wang70e5d062020-04-03 18:01:05 -0700393
Garry Wang11441182020-06-16 18:34:14 -0700394 logging.error('%s is reported from "image_usbkey_dev" control'
395 ' but not detected by fdisk!', usb_dev)
396 except error.AutoservRunError as e:
397 if 'Timeout encountered' in str(e):
398 logging.warning('Timeout encountered during fdisk run,'
399 ' skipping usbkey validation.')
400 return ''
401 raise
Garry Wang70e5d062020-04-03 18:01:05 -0700402
403 raise hosts.AutoservRepairError(
404 'No usbkey detected on servo, the usbkey may be either missing'
405 ' or broken. Please replace usbkey on the servo and retry.',
406 'missing usbkey')
407
408
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700409 def is_ec_supported(self):
Garry Wang9b8f2342020-04-17 16:34:09 -0700410 """Check if ec is supported on the servo_board"""
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700411 if self.servo_board:
412 try:
413 frm_config = config.Config(self.servo_board, self.servo_model)
414 return frm_config.chrome_ec
415 except Exception as e:
416 logging.error('Unexpected error when read from firmware'
417 ' configs; %s', str(e))
418 return False
419
Garry Wang70e5d062020-04-03 18:01:05 -0700420 def validate_image_usbkey(self):
421 """This method first validate if there is a recover usbkey on servo
422 that accessible to servohost, and second check if a ChromeOS image is
423 already on the usb drive and return the image_name so we can avoid
424 unnecessary download and flash to the recover usbkey on servo.
425
426 Please note that, there is special error handling logic here:
427 1. If unexpected error happens, we return empty string. So repair
428 actions will not get blocked.
429 2. If no working usbkey present on servo, but no errors, we'll raise
430 AutoservRepairError here.
431
432 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
433 or empty string if no test image detected, or unexpected
434 error occurred.
435 @raises: AutoservRepairError if the usbkey is not detected on servo.
436 """
437 usb_dev = self._probe_and_validate_usb_dev()
438 if usb_dev:
439 return self.get_image_name_from_usbkey(usb_dev)
440 else:
441 return ''
Garry Wang7b0e1b72020-03-25 19:08:59 -0700442
443
Richard Barnette1edbb162016-11-01 11:47:50 -0700444 def repair(self, silent=False):
445 """Attempt to repair servo host.
446
447 @param silent If true, suppress logging in `status.log`.
448 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700449 message = 'Beginning repair for servo host %s port %s serial %s'
450 message %= (self.hostname, self.servo_port, self.servo_serial)
451 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700452 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700453 self._repair_strategy.repair(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700454 self._servo_state = servo_constants.SERVO_STATE_WORKING
455 self.record('INFO', None, None,
456 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700457 # If target is a labstation then try to withdraw any existing
458 # reboot request created by this servo because it passed repair.
459 if self.is_labstation():
460 self.withdraw_reboot_request()
Garry Wang63b8c382020-03-11 22:28:40 -0700461 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700462 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700463 self._servo_state = self.determine_servo_state()
464 self.record('INFO', None, None,
465 'ServoHost repair set servo_state as %s'
466 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700467 if self._is_critical_error(e):
468 self.disconnect_servo()
469 self.stop_servod()
470 raise
471
472
473 def _is_critical_error(self, error):
474 if (isinstance(error, hosts.AutoservVerifyDependencyError)
475 and not error.is_critical()):
476 logging.warning('Non-critical verify failure(s) detected during'
477 ' verify/repair servo, servo connection will'
478 ' still up but may not fully functional.'
479 ' Some repair actions and servo depended'
480 ' tests may not run.')
481 return False
482 logging.info('Critical verify failure(s) detected during repair/verify'
483 ' servo. Disconnecting servo and stop servod, all repair '
484 'action and tests that depends on servo will not run.')
485 return True
Fang Deng5d518f42013-08-02 14:04:32 -0700486
487
Dan Shi4d478522014-02-14 13:46:32 -0800488 def get_servo(self):
489 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700490
Dan Shi4d478522014-02-14 13:46:32 -0800491 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700492 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700493 """
Dan Shi4d478522014-02-14 13:46:32 -0800494 return self._servo
495
496
Garry Wang79e9af62019-06-12 15:19:19 -0700497 def request_reboot(self):
498 """Request servohost to be rebooted when it's safe to by touch a file.
499 """
500 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700501 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700502 self.run('touch %s' % self._reboot_file, ignore_status=True)
503
504
Garry Wang464ff1e2019-07-18 17:20:34 -0700505 def withdraw_reboot_request(self):
506 """Withdraw a servohost reboot request if exists by remove the flag
507 file.
508 """
509 logging.debug('Withdrawing request to reboot servohost %s that created'
510 ' by servo with port # %s if exists.',
511 self.hostname, self.servo_port)
512 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
513
514
Garry Wangc1288cf2019-12-17 14:58:00 -0800515 def start_servod(self, quick_startup=False):
516 """Start the servod process on servohost.
517 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800518 # Skip if running on the localhost.(crbug.com/1038168)
519 if self.is_localhost():
520 logging.debug("Servohost is a localhost, skipping start servod.")
521 return
522
523 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800524 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800525 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800526 if self.servo_model:
527 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800528 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800529 logging.warning('Board for DUT is unknown; starting servod'
530 ' assuming a pre-configured board.')
531
532 cmd += ' PORT=%d' % self.servo_port
533 if self.servo_serial:
534 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800535
536 # Start servod with dual_v4 if the DUT/servo from designated pools.
537 dut_host_info = self.get_dut_host_info()
538 if dut_host_info:
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700539 # DUAL_V4: servo setup includes servo_micro and ccd_cr50
540 # connection to the DUT
541 is_dual_setup = False
Garry Wang11b5e872020-03-11 15:14:08 -0700542 if bool(dut_host_info.pools &
543 servo_constants.POOLS_SUPPORT_DUAL_V4):
Garry Wangd7367482020-02-27 13:52:40 -0800544 logging.debug('The DUT is detected in following designated'
545 ' pools %s,starting servod with DUAL_V4 option.',
Garry Wang11b5e872020-03-11 15:14:08 -0700546 servo_constants.POOLS_SUPPORT_DUAL_V4)
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700547 is_dual_setup = True
548 elif dut_host_info.attributes.get('servo_setup') == 'DUAL_V4':
549 logging.debug('The DUT servo setup specified in config as '
550 ' "DUAL_V4"')
551 is_dual_setup = True
552 if is_dual_setup:
Garry Wangd7367482020-02-27 13:52:40 -0800553 cmd += ' DUAL_V4=1'
554
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800555 # Remove the symbolic links from the logs. This helps ensure that
556 # a failed servod instantiation does not cause us to grab old logs
557 # by mistake.
558 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800559 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800560
561 # There's a lag between when `start servod` completes and when
562 # the _ServodConnectionVerifier trigger can actually succeed.
563 # The call to time.sleep() below gives time to make sure that
564 # the trigger won't fail after we return.
565
566 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
567 # But in the rare case all servo on a labstation are in heavy use they
568 # may take ~30 seconds. So the timeout value will double these value,
569 # and we'll try quick start up when first time initialize servohost,
570 # and use standard start up timeout in repair.
571 if quick_startup:
Garry Wang11b5e872020-03-11 15:14:08 -0700572 timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800573 else:
Garry Wang11b5e872020-03-11 15:14:08 -0700574 timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800575 logging.debug('Wait %s seconds for servod process fully up.', timeout)
576 time.sleep(timeout)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700577 # Cache the initial instance timestamp to check against servod restarts
578 self._initial_instance_ts = self.get_instance_logs_ts()
Garry Wangc1288cf2019-12-17 14:58:00 -0800579
580
581 def stop_servod(self):
582 """Stop the servod process on servohost.
583 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800584 # Skip if running on the localhost.(crbug.com/1038168)
585 if self.is_localhost():
586 logging.debug("Servohost is a localhost, skipping stop servod.")
587 return
588
Garry Wangc1288cf2019-12-17 14:58:00 -0800589 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800590 self.run('stop servod PORT=%d' % self.servo_port,
591 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800592 logging.debug('Wait %s seconds for servod process fully teardown.',
Garry Wang11b5e872020-03-11 15:14:08 -0700593 servo_constants.SERVOD_TEARDOWN_TIMEOUT)
594 time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
Garry Wangc1288cf2019-12-17 14:58:00 -0800595
596
597 def restart_servod(self, quick_startup=False):
598 """Restart the servod process on servohost.
599 """
600 self.stop_servod()
601 self.start_servod(quick_startup)
602
Garry Wang11b5e872020-03-11 15:14:08 -0700603
Garry Wangffbd2162020-04-17 16:13:48 -0700604 def _process_servodtool_error(self, response):
605 """Helper function to handle non-zero servodtool response.
606 """
607 if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
Garry Wangad245002020-05-15 15:20:23 -0700608 response.stdout):
Garry Wangffbd2162020-04-17 16:13:48 -0700609 logging.error('The servo is not plugged on a usb hub that supports'
610 ' power-cycle!')
Garry Wang000c6c02020-05-11 21:27:23 -0700611 # change the flag so we can update this label in later process.
612 self.smart_usbhub = False
Garry Wangffbd2162020-04-17 16:13:48 -0700613 return
614
615 if re.search(servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
616 self.servo_serial, response.stdout):
617 logging.error('No servo with serial %s found!', self.servo_serial)
618 return
619
620 logging.error('Unexpected error occurred from usbhub control, please'
621 ' file a bug and inform chrome-fleet-software@ team!')
622
623
624 def _get_servo_usb_devnum(self):
625 """Helper function to collect current usb devnum of servo.
626 """
Otabek Kasimov09192682020-06-01 18:17:44 -0700627 # TODO remove try-except when fix crbug.com/1087964
628 try:
629 cmd = 'servodtool device -s %s usb-path' % self.servo_serial
630 resp = self.run(cmd, ignore_status=True, timeout=30)
631 except Exception as e:
632 # Here we catch only timeout errors.
633 # Other errors is filtered by ignore_status=True
634 logging.debug('Attempt to get servo usb-path failed due to '
635 'timeout; %s', e)
636 return ''
637
Garry Wangffbd2162020-04-17 16:13:48 -0700638 if resp.exit_status != 0:
639 self._process_servodtool_error(resp)
640 return ''
641 usb_path = resp.stdout.strip()
642 logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
643
644 resp = self.run('cat %s/devnum' % usb_path,
645 ignore_status=True)
646 if resp.exit_status != 0:
647 self._process_servodtool_error(resp)
648 return ''
649 return resp.stdout.strip()
650
Garry Wang358aad42020-08-02 20:56:04 -0700651 def reboot_servo_v3_on_need(self):
652 """Check and reboot servo_v3 based on below conditions.
653 1. If there is an update pending on reboot.
654 2. Servo_v3 has been up for more than 96 hours.
655 """
656 if self.get_board() != 'beaglebone_servo':
657 logging.info('Servo reboot is only applicable for servo V3.')
Otabek Kasimove6df8102020-07-21 20:15:25 -0700658 return
659
Garry Wang358aad42020-08-02 20:56:04 -0700660 update_pending_reboot = (self._check_update_status() ==
661 self.UPDATE_STATE.PENDING_REBOOT)
662 uptime_hours = float(self.check_uptime())/3600
663 logging.info('Uptime of servo_v3: %s hour(s)', uptime_hours)
664 long_up_time = uptime_hours > 96
665
666 # Skip reboot if neither condition are met.
667 if not (update_pending_reboot or long_up_time):
Otabek Kasimove6df8102020-07-21 20:15:25 -0700668 return
669
Garry Wang358aad42020-08-02 20:56:04 -0700670 if update_pending_reboot:
671 message = 'Starting reboot servo_v3 because an update is pending.'
672 reboot_method = self._post_update_reboot
673 elif long_up_time:
674 message = 'Starting reboot servo_v3 because uptime > 96 hours.'
675 reboot_method = self._servo_host_reboot
676 self.record('INFO', None, None, message)
677 logging.info(message)
Otabek Kasimove6df8102020-07-21 20:15:25 -0700678 try:
Garry Wang358aad42020-08-02 20:56:04 -0700679 reboot_method()
Otabek Kasimove6df8102020-07-21 20:15:25 -0700680 message = 'Servo_v3 reboot completed successfully.'
681 except Exception as e:
682 logging.debug("Fail to reboot servo_v3; %s", e)
683 message = ('Servo_v3 reboot failed, please check debug log '
684 'for details.')
685 logging.info(message)
686 self.record('INFO', None, None, message)
Garry Wangffbd2162020-04-17 16:13:48 -0700687
688 def _reset_servo(self):
689 logging.info('Resetting servo through smart usbhub.')
Otabek Kasimov09192682020-06-01 18:17:44 -0700690 # TODO remove try-except when fix crbug.com/1087964
691 try:
692 resp = self.run('servodtool device -s %s power-cycle' %
693 self.servo_serial, ignore_status=True,
694 timeout=30)
695 if resp.exit_status != 0:
696 self._process_servodtool_error(resp)
697 return False
698 except Exception as e:
699 # Here we catch only timeout errors.
700 # Other errors is filtered by ignore_status=True
701 logging.debug('Attempt to reset servo failed due to timeout;'
702 ' %s', e)
Garry Wangffbd2162020-04-17 16:13:48 -0700703 return False
704
705 logging.debug('Wait %s seconds for servo to come back from reset.',
706 servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
707 time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
Garry Wang000c6c02020-05-11 21:27:23 -0700708 # change the flag so we can update this label in later process.
709 self.smart_usbhub = True
Garry Wangffbd2162020-04-17 16:13:48 -0700710 return True
711
712
713 def reset_servo(self):
714 """Reset(power-cycle) the servo via smart usbhub.
715 """
716 if not self.is_labstation():
717 logging.info('Servo reset is not applicable to servo_v3.')
718 return
719
720 pre_reset_devnum = self._get_servo_usb_devnum()
721 logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
722 result = self._reset_servo()
723 if not result:
Garry Wangfd5c8b62020-06-08 15:36:54 -0700724 message = ('Failed to reset servo with serial: %s. (Please ignore'
725 ' this error if the DUT is not connected to a smart'
726 ' usbhub).' % self.servo_serial)
Garry Wangffbd2162020-04-17 16:13:48 -0700727 logging.warning(message)
728 self.record('INFO', None, None, message)
729 return
730
731 post_reset_devnum = self._get_servo_usb_devnum()
732 logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
733 if not (pre_reset_devnum and post_reset_devnum):
734 message = ('Servo reset completed but unable to verify'
735 ' devnum change!')
736 elif pre_reset_devnum != post_reset_devnum:
737 message = ('Reset servo with serial %s completed successfully!'
738 % self.servo_serial)
739 else:
740 message = 'Servo reset completed but devnum is still not changed!'
741 logging.info(message)
742 self.record('INFO', None, None, message)
743
744
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800745 def _extract_compressed_logs(self, logdir, relevant_files):
746 """Decompress servod logs in |logdir|.
747
748 @param logdir: directory containing compressed servod logs.
749 @param relevant_files: list of files in |logdir| to consider.
750
751 @returns: tuple, (tarfiles, files) where
752 tarfiles: list of the compressed filenames that have been
753 extracted and deleted
754 files: list of the uncompressed files that were generated
755 """
756 # For all tar-files, first extract them to the directory, and
757 # then let the common flow handle them.
758 tarfiles = [cf for cf in relevant_files if
759 cf.endswith(self.COMPRESSION_SUFFIX)]
760 files = []
761 for f in tarfiles:
762 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
763 with tarfile.open(f) as tf:
764 # Each tarfile has only one member, as
765 # that's the compressed log.
766 member = tf.members[0]
767 # Manipulate so that it only extracts the basename, and not
768 # the directories etc.
769 member.name = norm_name
770 files.append(os.path.join(logdir, member.name))
771 tf.extract(member, logdir)
772 # File has been extracted: remove the compressed file.
773 os.remove(f)
774 return tarfiles, files
775
776 def _extract_mcu_logs(self, log_subdir):
777 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
778
779 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
780 lines from the logs to generate invidiual console logs e.g. after
781 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
782 those MCUs had any console input/output.
783
784 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
785 """
786 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
787 # files
788 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
789 if not os.path.exists(mcu_lines_file):
790 logging.info('No DEBUG logs found to extract MCU logs from.')
791 return
792 mcu_files = {}
793 mcu_file_template = '%s.txt'
794 with open(mcu_lines_file, 'r') as f:
795 for line in f:
796 match = self.MCU_EXTRACTOR.match(line)
797 if match:
798 mcu = match.group(self.MCU_GROUP).lower()
799 line = match.group(self.LINE_GROUP)
800 if mcu not in mcu_files:
801 mcu_file = os.path.join(log_subdir,
802 mcu_file_template % mcu)
803 mcu_files[mcu] = open(mcu_file, 'a')
804 fd = mcu_files[mcu]
805 fd.write(line + '\n')
806 for f in mcu_files:
807 mcu_files[f].close()
808
809
810 def remove_latest_log_symlinks(self):
811 """Remove the conveninence symlinks 'latest' servod logs."""
812 symlink_wildcard = '%s/latest*' % self.remote_log_dir
813 cmd = 'rm ' + symlink_wildcard
814 self.run(cmd, stderr_tee=None, ignore_status=True)
815
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700816 def probe_servod_restart(self, instance_ts, outdir):
817 """Grab servod logs from previous instances if part of this session.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800818
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700819 If since the last time this host called start_servod() servod crashed
820 and restarted, this helper finds those logs as well, and stores them
821 with the |OLD_LOG_SUFFIX| to investigate if necessary.
Prasad Vuppalapu5bd9da12020-03-31 01:46:47 +0000822
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700823 It also issues a panicinfo command to servo devices after the restart
824 to try and collect reboot information for debugging.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800825
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700826 @param instance_ts: the log timestamp that the current instance uses
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800827 @param outdir: directory to create a subdirectory into to place the
828 servod logs into.
829 """
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700830 if self._initial_instance_ts is None:
831 logging.info('No log timestamp grabbed successfully on servod '
832 'startup. Cannot check device restarts. Ignoring.')
833 return
834 if instance_ts == self._initial_instance_ts:
835 logging.debug('Servod appears to have run without restarting')
836 return
837 # Servod seems to have restarted (at least once). |_initial_instance_ts|
838 # is the first timestamp, and instance_ts is the current timestamp. Find
839 # all timestamps in between them, and grab the logs for each.
840 tss = self._find_instance_timestamps_between(self._initial_instance_ts,
841 instance_ts)
842 logging.info('Servod has restarted %d times between the start and the '
843 'end of this servo_host.', len(tss))
844 logging.info('This might be an issue. Will extract all logs from each '
845 'instance.')
846 logging.info('Logs that are not the currently running (about to turn '
847 'down) instance are maked with a .%s in their folder.',
848 self.OLD_LOG_SUFFIX)
849 for ts in tss:
850 self.get_instance_logs(ts, outdir, old=True)
851 # Lastly, servod has restarted due to a potential issue. Try to get
852 # panic information from servo micro and servo v4 for the current logs.
853 # This can only happen if the |_servo| attribute is initialized.
854 if self._servo:
855 for mcu in ['servo_micro', 'servo_v4']:
856 ctrl = '%s_uart_cmd' % mcu
857 if self._servo.has_control(ctrl):
858 logging.info('Trying to retrieve %r panicinfo into logs',
859 mcu)
860 try:
861 self._servo.set_nocheck(ctrl, 'panicinfo')
862 except error.TestFail as e:
863 logging.error('Failed to generate panicinfo for %r '
864 'logs. %s', mcu, str(e))
865
866 def _find_instance_timestamps_between(self, start_ts, end_ts):
867 """Find all log timestamps between [start_ts, end_ts).
868
869 @param start_ts: str, earliest log timestamp of interest
870 @param end_ts: str, latest log timestamp of interest
871
872 @returns: list, all timestamps between start_ts and end_ts, end_ts
873 exclusive, on the servo_host. An empty list on errors
874 """
875 # Simply get all timestamp, and then sort and remove
876 cmd = 'ls %s' % self.remote_log_dir
877 res = self.run(cmd, stderr_tee=None, ignore_status=True)
878 if res.exit_status != 0:
879 # Here we failed to find anything.
880 logging.info('Failed to find remote servod logs. Ignoring.')
881 return []
882 logfiles = res.stdout.strip().split()
883 timestamps = set()
884 for logfile in logfiles:
885 ts_match = self.TS_EXTRACTOR.match(logfile)
886 if not ts_match:
887 # Simply ignore files that fail the check. It might be the
888 # 'latest' symlinks or random files.
889 continue
890 timestamps.add(ts_match.group(self.TS_GROUP))
891 # At this point we have all unique timestamps.
892 timestamps = sorted(timestamps)
893 for ts in [start_ts, end_ts]:
894 if ts not in timestamps:
895 logging.error('Timestamp %r not in servod logs. Cannot query '
896 'for timestamps in between %r and %r', ts,
897 start_ts, end_ts)
898 return []
899 return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
900
901 def get_instance_logs_ts(self):
902 """Retrieve the currently running servod instance's log timestamp
903
904 @returns: str, timestamp for current instance, or None on failure
905 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800906 # First, extract the timestamp. This cmd gives the real filename of
907 # the latest aka current log file.
908 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
909 'then realpath %(dir)s/latest.DEBUG;'
910 'elif [ -f %(dir)s/latest ];'
911 'then realpath %(dir)s/latest;'
912 'else exit %(code)d;'
913 'fi' % {'dir': self.remote_log_dir,
914 'code': self.NO_SYMLINKS_CODE})
915 res = self.run(cmd, stderr_tee=None, ignore_status=True)
916 if res.exit_status != 0:
917 if res.exit_status == self.NO_SYMLINKS_CODE:
918 logging.warning('servod log latest symlinks not found. '
919 'This is likely due to an error starting up '
920 'servod. Ignoring..')
921 else:
922 logging.warning('Failed to find servod logs on servo host.')
923 logging.warning(res.stderr.strip())
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700924 return None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800925 fname = os.path.basename(res.stdout.strip())
926 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
Ruben Rodriguez Buchillone9aa2b02020-03-04 12:14:28 -0800927 ts_match = self.TS_EXTRACTOR.match(fname)
928 if not ts_match:
929 logging.warning('Failed to extract timestamp from servod log file '
930 '%r. Skipping. The servo host is using outdated '
931 'servod logging and needs to be updated.', fname)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700932 return None
933 return ts_match.group(self.TS_GROUP)
934
935 def get_instance_logs(self, instance_ts, outdir, old=False):
936 """Collect all logs with |instance_ts| and dump into a dir in |outdir|
937
938 This method first collects all logs on the servo_host side pertaining
939 to this servod instance (port, instatiation). It glues them together
940 into combined log.[level].txt files and extracts all available MCU
941 console I/O from the logs into individual files e.g. servo_v4.txt
942
943 All the output can be found in a directory inside |outdir| that
944 this generates based on |LOG_DIR|, the servod port, and the instance
945 timestamp on the servo_host side.
946
947 @param instance_ts: log timestamp to grab logfiles for
948 @param outdir: directory to create a subdirectory into to place the
949 servod logs into.
950 @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
951 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800952 # Create the local results log dir.
953 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
954 str(self.servo_port),
955 instance_ts))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700956 if old:
957 log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
958 logging.info('Saving servod logs to %r.', log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800959 os.mkdir(log_dir)
960 # Now, get all files with that timestamp.
961 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
962 instance_ts)
963 res = self.run(cmd, stderr_tee=None, ignore_status=True)
964 files = res.stdout.strip().split()
965 try:
966 self.get_file(files, log_dir, try_rsync=False)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700967 if not os.listdir(log_dir):
968 logging.info('No servod logs retrieved. Ignoring, and removing '
969 '%r again.', log_dir)
970 os.rmdir(log_dir)
971 return
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800972 except error.AutoservRunError as e:
973 result = e.result_obj
974 if result.exit_status != 0:
975 stderr = result.stderr.strip()
976 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
977 stderr or '\n%s' % result)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700978 # Remove the log_dir as nothing was added to it.
979 os.rmdir(log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800980 return
981 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
982 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
983 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
984 # Create the joint files for each loglevel. i.e log.DEBUG
985 joint_file = self.JOINT_LOG_PREFIX
986 if level_name:
987 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
988 # This helps with some online tools to avoid complaints about an
989 # unknown filetype.
990 joint_file = joint_file + '.txt'
991 joint_path = os.path.join(log_dir, joint_file)
992 files = [f for f in local_files if level_name in f]
993 if not files:
994 # TODO(crrev.com/c/1793030): remove no-level case once CL
995 # is pushed
996 continue
997 # Extract compressed logs if any.
998 compressed, extracted = self._extract_compressed_logs(log_dir,
999 files)
1000 files = list(set(files) - set(compressed))
1001 files.extend(extracted)
1002 # Need to sort. As they all share the same timestamp, and
1003 # loglevel, the index itself is sufficient. The highest index
1004 # is the oldest file, therefore we need a descending sort.
1005 def sortkey(f, level=level_name):
1006 """Custom sortkey to sort based on rotation number int."""
1007 if f.endswith(level_name): return 0
1008 return int(f.split('.')[-1])
1009
1010 files.sort(reverse=True, key=sortkey)
1011 # Just rename the first file rather than building from scratch.
1012 os.rename(files[0], joint_path)
1013 with open(joint_path, 'a') as joint_f:
1014 for logfile in files[1:]:
1015 # Transfer the file to the joint file line by line.
1016 with open(logfile, 'r') as log_f:
1017 for line in log_f:
1018 joint_f.write(line)
1019 # File has been written over. Delete safely.
1020 os.remove(logfile)
1021 # Need to remove all files form |local_files| so we don't
1022 # analyze them again.
1023 local_files = list(set(local_files) - set(files) - set(compressed))
1024 # Lastly, extract MCU logs from the joint logs.
1025 self._extract_mcu_logs(log_dir)
1026
Garry Wang79e9af62019-06-12 15:19:19 -07001027 def _lock(self):
1028 """lock servohost by touching a file.
1029 """
1030 logging.debug('Locking servohost %s by touching %s file',
1031 self.hostname, self._lock_file)
1032 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001033 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -07001034
1035
1036 def _unlock(self):
1037 """Unlock servohost by removing the lock file.
1038 """
1039 logging.debug('Unlocking servohost by removing %s file',
1040 self._lock_file)
1041 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001042 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -07001043
1044
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001045 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -07001046 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001047 # NOTE: throughout this method there are multiple attempts to catch
1048 # all errors. This is WAI as log grabbing should not fail tests.
1049 # However, the goal is to catch and handle/process all errors, thus
1050 # we print the traceback and ask for a bug.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001051 if self._closed:
1052 logging.debug('ServoHost is already closed.')
1053 return
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001054 instance_ts = self.get_instance_logs_ts()
1055 # TODO(crbug.com/1011516): once enabled, remove the check against
1056 # localhost and instead check against log-rotiation enablement.
1057 logs_available = (instance_ts is not None and
1058 self.job and
1059 not self.is_localhost())
1060 if logs_available:
1061 # Probe whether there was a servod restart, and grab those old
1062 # logs as well.
1063 try:
1064 self.probe_servod_restart(instance_ts, self.job.resultdir)
1065 except (error.AutoservRunError, error.TestFail) as e:
1066 logging.info('Failed to grab servo logs due to: %s. '
1067 'This error is forgiven.', str(e))
1068 except Exception as e:
1069 logging.error('Unexpected error probing for old logs. %s. '
1070 'Forgiven. Please file a bug and fix or catch '
1071 'in log probing function', str(e),
1072 exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001073 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001074 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -07001075 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001076 self._servo.close(outdir)
1077
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001078 if logs_available:
1079 # Grab current (not old like above) logs after the servo instance
1080 # was closed out.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001081 try:
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001082 self.get_instance_logs(instance_ts, self.job.resultdir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001083 except error.AutoservRunError as e:
1084 logging.info('Failed to grab servo logs due to: %s. '
1085 'This error is forgiven.', str(e))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001086 except Exception as e:
1087 logging.error('Unexpected error grabbing servod logs. %s. '
1088 'Forgiven. Please file a bug and fix or catch '
1089 'in log grabbing function', str(e), exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001090
Garry Wang7c00b0f2019-06-25 17:28:17 -07001091 if self._is_locked:
1092 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -07001093 try:
1094 self._unlock()
1095 except error.AutoservSSHTimeout:
1096 logging.error('Unlock servohost failed due to ssh timeout.'
1097 ' It may caused by servohost went down during'
1098 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -08001099 # We want always stop servod after task to minimum the impact of bad
1100 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -08001101 try:
1102 self.stop_servod()
1103 except error.AutoservRunError as e:
1104 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001105 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -08001106
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001107 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001108 # Mark closed.
1109 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001110
1111
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001112 def get_servo_state(self):
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001113 return self._servo_state
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001114
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001115 def _get_host_metrics_data(self):
1116 return {'port': self.servo_port,
Otabek Kasimov0ea47362020-07-11 20:55:09 -07001117 'host': self.get_dut_hostname() or self.hostname,
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001118 'board': self.servo_board or ''}
1119
1120 def _is_servo_device_connected(self, servo_type, serial):
1121 """Check if device is connected to the labstation.
1122
1123 Works for all servo devices connected to the labstation.
1124 For servo_v3 please use 'self._is_servo_board_present_on_servo_v3'
1125
1126 @param servo_type: The type of servo device. Expecting value can be
1127 servo_v4 or servo_micro.
1128 @param serial: The serial number of the device to detect it.
1129 """
1130 vid_pid = self.SERVO_VID_PID.get(servo_type)
1131 if not vid_pid or not serial:
1132 # device cannot detected without VID/PID or serial number
1133 return False
1134 logging.debug('Started to detect %s', servo_type)
1135 try:
1136 cmd = 'lsusb -v -d %s |grep iSerial |grep %s' % (vid_pid, serial)
1137 result = self.run(cmd, ignore_status=True, timeout=30)
1138 if result.exit_status == 0 and result.stdout.strip():
1139 logging.debug('The %s is plugged in to the host.', servo_type)
1140 return True
1141 logging.debug('%s device is not detected; %s', servo_type, result)
1142 return False
1143 except Exception as e:
1144 # can be triggered by timeout issue due running the script
1145 metrics.Counter(
1146 'chromeos/autotest/repair/servo_detection/timeout'
1147 ).increment(fields=self._get_host_metrics_data())
1148 logging.error('%s device is not detected; %s', servo_type, str(e))
1149 return None
1150
1151 def _is_servo_board_present_on_servo_v3(self):
1152 """Check if servo board is detected on servo_v3"""
1153 vid_pids = self.SERVO_VID_PID['servo_v3']
1154 if not vid_pids or len(vid_pids) == 0:
1155 # device cannot detected without VID/PID
1156 return False
1157 logging.debug('Started to detect servo board on servo_v3')
1158 not_detected = 'The servo board is not detected on servo_v3'
1159 try:
1160 cmd = 'lsusb | grep "%s"' % "\|".join(vid_pids)
1161 result = self.run(cmd, ignore_status=True, timeout=30)
1162 if result.exit_status == 0 and result.stdout.strip():
1163 logging.debug('The servo board is detected on servo_v3')
1164 return True
1165 logging.debug('%s; %s', not_detected, result)
1166 return False
1167 except Exception as e:
1168 # can be triggered by timeout issue due running the script
1169 metrics.Counter(
1170 'chromeos/autotest/repair/servo_detection/timeout'
1171 ).increment(fields=self._get_host_metrics_data())
1172 logging.error('%s; %s', not_detected, str(e))
1173 return None
1174
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001175 def _is_main_device_not_detected_on_servo_v4(self):
1176 """Check if servod cannot find main device on servo.
1177
1178 The check based on reading servod logs for servo_v4.
1179 """
1180 if not self._initial_instance_ts:
1181 # latest log not found
1182 return False
1183 logging.debug('latest log for servod created at %s',
1184 self._initial_instance_ts)
1185 try:
1186 log_created = calendar.timegm(time.strptime(
1187 self._initial_instance_ts,
1188 "%Y-%m-%d--%H-%M-%S.%f"))
1189 except ValueError as e:
1190 logging.debug('Cannot read time from log file name: %s',
1191 self._initial_instance_ts)
1192 return False
1193 min_time_created = calendar.timegm(time.gmtime())
1194 if min_time_created > log_created + 3600:
1195 # the log file is old we cannot use it
1196 logging.debug('log file was created more than hour ago, too old')
1197 return False
1198 logging.debug('latest log was created not longer then 1 hour ago')
1199
1200 # check if servod can detect main device by servo_v4
1201 message = 'ERROR - No servo micro or CCD detected for board'
1202 cmd = ('cat /var/log/servod_%s/log.%s.INFO |grep "%s"'
1203 % (self.servo_port, self._initial_instance_ts, message))
1204 result = self.run(cmd, ignore_status=True)
1205 if result.stdout.strip():
1206 logging.info('Servod cannot detect main device on the servo; '
1207 'Can be caused by bad hardware of servo or '
1208 'issue on the DUT side.')
1209 return True
1210 logging.debug('The main device is detected')
1211 return False
1212
Otabek Kasimov15963492020-06-23 21:10:51 -07001213 def get_verify_state(self, tag):
1214 """Return the state of servo verifier.
1215
1216 @returns: bool or None
1217 """
1218 return self._repair_strategy.verifier_is_good(tag)
1219
1220 def determine_servo_state(self):
1221 """Determine servo state based on the failed verifier.
1222
1223 @returns: servo state value
1224 The state detecting based on first fail verifier or collecting of
1225 them.
1226 """
1227 ssh = self.get_verify_state('servo_ssh')
1228 disk_space = self.get_verify_state('disk_space')
1229 start_servod = self.get_verify_state('servod_job')
1230 create_servo = self.get_verify_state('servod_connection')
1231 init_servo = self.get_verify_state('servod_control')
1232 pwr_button = self.get_verify_state('pwr_button')
1233 lid_open = self.get_verify_state('lid_open')
1234 ec_board = self.get_verify_state('ec_board')
1235 ccd_testlab = self.get_verify_state('ccd_testlab')
1236
1237 if not ssh:
1238 return servo_constants.SERVO_STATE_NO_SSH
1239
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001240 if (start_servod == self.VERIFY_FAILED
1241 or create_servo == self.VERIFY_FAILED):
1242 # sometimes servo can start with out present servo
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001243 if self.is_labstation():
1244 if not self.servo_serial:
1245 return servo_constants.SERVO_STATE_WRONG_CONFIG
1246 if self._is_servo_device_connected(
1247 'servo_v4',
1248 self.servo_serial) == False:
1249 return servo_constants.SERVO_STATE_NOT_CONNECTED
1250 elif self._is_servo_board_present_on_servo_v3() == False:
1251 return servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001252
1253 if start_servod == self.VERIFY_FAILED:
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001254 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1255
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001256 if create_servo == self.VERIFY_FAILED:
1257 if (self.is_labstation()
1258 and self._is_main_device_not_detected_on_servo_v4()):
1259 servo_type = None
1260 if self.get_dut_host_info():
1261 servo_type = self.get_dut_host_info().get_label_value(
1262 servo_constants.SERVO_TYPE_LABEL_PREFIX)
1263 if servo_type and 'servo_micro' in servo_type:
1264 serial = self.get_servo_micro_serial_number()
1265 logging.debug('servo_micro serial: %s', serial)
1266 if self._is_servo_device_detected('servo_micro',
1267 serial):
1268 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1269 # Device can be not detected because of DUT
1270 # TODO (otabek) update after b/159755652 and b/159754985
1271 metrics.Counter(
1272 'chromeos/autotest/repair/servo_state/needs_replacement'
1273 ).increment(fields=self._get_host_metrics_data())
1274 elif not self.is_labstation():
1275 # Here need logic to check if flex cable is connected
1276 pass
1277
Otabek Kasimov15963492020-06-23 21:10:51 -07001278 # one of the reason why servo can not initialized
1279 if ccd_testlab == self.VERIFY_FAILED:
1280 return servo_constants.SERVO_STATE_CCD_TESTLAB_ISSUE
1281
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001282 if (create_servo == self.VERIFY_FAILED
1283 or init_servo == self.VERIFY_FAILED):
Otabek Kasimov15963492020-06-23 21:10:51 -07001284 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1285
1286 if pwr_button == self.VERIFY_FAILED:
1287 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1288 if lid_open == self.VERIFY_FAILED:
1289 return servo_constants.SERVO_STATE_LID_OPEN_FAILED
1290 if ec_board == self.VERIFY_FAILED:
1291 return servo_constants.SERVO_STATE_EC_BROKEN
1292
Otabek Kasimov15963492020-06-23 21:10:51 -07001293 metrics.Counter(
1294 'chromeos/autotest/repair/unknown_servo_state'
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001295 ).increment(fields=self._get_host_metrics_data())
Otabek Kasimov15963492020-06-23 21:10:51 -07001296 logging.info('We do not have special state for this failure yet :)')
1297 return servo_constants.SERVO_STATE_BROKEN
1298
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001299
Richard Barnetteea3e4602016-06-10 12:36:41 -07001300def make_servo_hostname(dut_hostname):
1301 """Given a DUT's hostname, return the hostname of its servo.
1302
1303 @param dut_hostname: hostname of a DUT.
1304
1305 @return hostname of the DUT's servo.
1306
1307 """
1308 host_parts = dut_hostname.split('.')
1309 host_parts[0] = host_parts[0] + '-servo'
1310 return '.'.join(host_parts)
1311
1312
1313def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001314 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001315
1316 @param servo_hostname: hostname of the servo host.
1317
1318 @return True if it's up, False otherwise
1319 """
1320 # Technically, this duplicates the SSH ping done early in the servo
1321 # proxy initialization code. However, this ping ends in a couple
1322 # seconds when if fails, rather than the 60 seconds it takes to decide
1323 # that an SSH ping has timed out. Specifically, that timeout happens
1324 # when our servo DNS name resolves, but there is no host at that IP.
1325 logging.info('Pinging servo host at %s', servo_hostname)
1326 ping_config = ping_runner.PingConfig(
1327 servo_hostname, count=3,
1328 ignore_result=True, ignore_status=True)
1329 return ping_runner.PingRunner().ping(ping_config).received > 0
1330
1331
Richard Barnettee519dcd2016-08-15 17:37:17 -07001332def _map_afe_board_to_servo_board(afe_board):
1333 """Map a board we get from the AFE to a servo appropriate value.
1334
1335 Many boards are identical to other boards for servo's purposes.
1336 This function makes that mapping.
1337
1338 @param afe_board string board name received from AFE.
1339 @return board we expect servo to have.
1340
1341 """
1342 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1343 BOARD_MAP = {'gizmo': 'panther'}
1344 mapped_board = afe_board
1345 if afe_board in BOARD_MAP:
1346 mapped_board = BOARD_MAP[afe_board]
1347 else:
1348 for suffix in KNOWN_SUFFIXES:
1349 if afe_board.endswith(suffix):
1350 mapped_board = afe_board[0:-len(suffix)]
1351 break
1352 if mapped_board != afe_board:
1353 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1354 return mapped_board
1355
1356
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001357def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001358 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001359
Richard Barnetteea3e4602016-06-10 12:36:41 -07001360 @param dut_host Instance of `Host` on which to find the servo
1361 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -07001362 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001363 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001364 info = dut_host.host_info_store.get()
1365 servo_args = {k: v for k, v in info.attributes.iteritems()
Garry Wang11b5e872020-03-11 15:14:08 -07001366 if k in servo_constants.SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -07001367
Garry Wang11b5e872020-03-11 15:14:08 -07001368 if servo_constants.SERVO_PORT_ATTR in servo_args:
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001369 try:
Garry Wang11b5e872020-03-11 15:14:08 -07001370 servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1371 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001372 except ValueError:
1373 logging.error('servo port is not an int: %s',
Garry Wang11b5e872020-03-11 15:14:08 -07001374 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001375 # Reset servo_args because we don't want to use an invalid port.
Garry Wang11b5e872020-03-11 15:14:08 -07001376 servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001377
1378 if info.board:
Garry Wang11b5e872020-03-11 15:14:08 -07001379 servo_board = _map_afe_board_to_servo_board(info.board)
1380 servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -07001381 if info.model:
Garry Wang11b5e872020-03-11 15:14:08 -07001382 servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1383 return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -07001384
1385
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001386def _tweak_args_for_ssp_moblab(servo_args):
Garry Wang11b5e872020-03-11 15:14:08 -07001387 if (servo_args[servo_constants.SERVO_HOST_ATTR]
1388 in ['localhost', '127.0.0.1']):
1389 servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001390 'SSP', 'host_container_ip', type=str, default=None)
1391
1392
Dan Shi023aae32016-05-25 11:13:01 -07001393def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001394 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001395 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -08001396
Richard Barnette9a26ad62016-06-10 12:03:08 -07001397 This function attempts to create and verify or repair a `ServoHost`
1398 object for a servo connected to the given `dut`, subject to various
1399 constraints imposed by the parameters:
1400 * When the `servo_args` parameter is not `None`, a servo
1401 host must be created, and must be checked with `repair()`.
1402 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1403 true:
1404 * If `try_servo_repair` is true, then create a servo host and
1405 check it with `repair()`.
1406 * Otherwise, if the servo responds to `ping` then create a
1407 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -08001408
Richard Barnette9a26ad62016-06-10 12:03:08 -07001409 In cases where `servo_args` was not `None`, repair failure
1410 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001411 are logged and then discarded. Note that this only happens in cases
1412 where we're called from a test (not special task) control file that
1413 has an explicit dependency on servo. In that case, we require that
1414 repair not write to `status.log`, so as to avoid polluting test
1415 results.
1416
1417 TODO(jrbarnette): The special handling for servo in test control
1418 files is a thorn in my flesh; I dearly hope to see it cut out before
1419 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001420
1421 Parameters for a servo host consist of a host name, port number, and
1422 DUT board, and are determined from one of these sources, in order of
1423 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -07001424 * Servo attributes from the `dut` parameter take precedence over
1425 all other sources of information.
1426 * If a DNS entry for the servo based on the DUT hostname exists in
1427 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -07001428 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001429 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -07001430 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001431
1432 @param dut An instance of `Host` from which to take
1433 servo parameters (if available).
1434 @param servo_args A dictionary with servo parameters to use if
1435 they can't be found from `dut`. If this
1436 argument is supplied, unrepaired exceptions
1437 from `verify()` will be passed back to the
1438 caller.
1439 @param try_lab_servo If not true, servo host creation will be
1440 skipped unless otherwise required by the
1441 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001442 @param try_servo_repair If true, check a servo host with
1443 `repair()` instead of `verify()`.
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001444 @param dut_host_info: A HostInfo object of the DUT that connected
1445 to this servo.
Dan Shi4d478522014-02-14 13:46:32 -08001446
1447 @returns: A ServoHost object or None. See comments above.
1448
1449 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001450 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001451 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001452 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -07001453 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001454 if utils.in_moblab_ssp():
1455 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001456 logging.debug(
1457 'Overriding provided servo_args (%s) with arguments'
1458 ' determined from the host (%s)',
1459 servo_args,
1460 servo_args_override,
1461 )
Richard Barnetteea3e4602016-06-10 12:36:41 -07001462 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001463
Richard Barnetteea3e4602016-06-10 12:36:41 -07001464 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001465 logging.debug('No servo_args provided, and failed to find overrides.')
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001466 if try_lab_servo or servo_dependency:
Otabek Kasimov646812c2020-06-23 20:01:36 -07001467 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001468 else:
1469 # For regular test case which not required the servo
1470 return None, None
1471
Garry Wang11b5e872020-03-11 15:14:08 -07001472 servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1473 servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001474 if not _is_servo_host_information_exist(servo_hostname, servo_port):
1475 logging.debug(
1476 'Servo connection info missed hostname: %s , port: %s',
1477 servo_hostname, servo_port)
Otabek Kasimov646812c2020-06-23 20:01:36 -07001478 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001479 if not is_servo_host_information_valid(servo_hostname, servo_port):
1480 logging.debug(
1481 'Servo connection info is incorrect hostname: %s , port: %s',
1482 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001483 return None, servo_constants.SERVO_STATE_WRONG_CONFIG
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001484 if (not servo_dependency and not try_servo_repair and
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001485 not servo_host_is_up(servo_hostname)):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001486 logging.debug('ServoHost is not up.')
Otabek Kasimov646812c2020-06-23 20:01:36 -07001487 return None, servo_constants.SERVO_STATE_NO_SSH
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001488
Garry Wangebc015b2019-06-06 17:45:06 -07001489 newhost = ServoHost(**servo_args)
Garry Wangffbd2162020-04-17 16:13:48 -07001490
Otabek Kasimove6df8102020-07-21 20:15:25 -07001491 # Reset or reboot servo device only during AdminRepair tasks.
1492 if try_servo_repair:
1493 if newhost._is_locked:
1494 # Reset servo if the servo is locked, as we check if the servohost
1495 # is up, if the servohost is labstation and if the servohost is in
1496 # lab inside the locking logic.
1497 newhost.reset_servo()
1498 else:
Garry Wang358aad42020-08-02 20:56:04 -07001499 try:
1500 newhost.reboot_servo_v3_on_need()
Garry Wang1f0d5332020-08-10 19:32:32 -07001501 except Exception as e:
1502 logging.info('[Non-critical] Unexpected error while trying to'
1503 ' reboot servo_v3, skipping the reboot; %s', e)
Otabek Kasimove6df8102020-07-21 20:15:25 -07001504
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -07001505 if dut:
1506 newhost.set_dut_hostname(dut.hostname)
Otabek Kasimov9e90ae12020-08-14 03:01:19 -07001507 if dut_host_info:
1508 newhost.set_dut_host_info(dut_host_info)
Garry Wangffbd2162020-04-17 16:13:48 -07001509
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001510 if try_lab_servo or try_servo_repair:
1511 try:
1512 logging.info("Check and update servo firmware.")
1513 servo_updater.update_servo_firmware(
1514 newhost,
1515 force_update=False)
1516 except Exception as e:
1517 logging.error("Servo device update error: %s", e)
1518
Garry Wangcdd27b22020-01-13 14:59:11 -08001519 try:
1520 newhost.restart_servod(quick_startup=True)
1521 except error.AutoservSSHTimeout:
1522 logging.warning("Restart servod failed due ssh connection "
1523 "to servohost timed out. This error is forgiven"
1524 " here, we will retry in servo repair process.")
1525 except error.AutoservRunError as e:
1526 logging.warning("Restart servod failed due to:\n%s\n"
1527 "This error is forgiven here, we will retry"
1528 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -07001529
Richard Barnette9a26ad62016-06-10 12:03:08 -07001530 # Note that the logic of repair() includes everything done
1531 # by verify(). It's sufficient to call one or the other;
1532 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001533 if servo_dependency:
1534 newhost.repair(silent=True)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001535 return newhost, newhost.get_servo_state()
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001536
1537 if try_servo_repair:
1538 try:
1539 newhost.repair()
1540 except Exception:
1541 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -07001542 else:
1543 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001544 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001545 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001546 logging.exception('servo verify failed for %s', newhost.hostname)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001547 return newhost, newhost.get_servo_state()
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001548
1549
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001550def _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001551 if hostname is None or len(hostname.strip()) == 0:
1552 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001553 if port is None:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001554 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001555 if not type(port) is int:
1556 try:
1557 int(port)
1558 except ValueError:
1559 return False
1560
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001561 return True
1562
1563
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001564def is_servo_host_information_valid(hostname, port):
Garry Wang9b8f2342020-04-17 16:34:09 -07001565 """Check if provided servo attributes are valid.
1566
1567 @param hostname Hostname of the servohost.
1568 @param port servo port number.
1569
1570 @returns: A bool value to indicate if provided servo attribute valid.
1571 """
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001572 if not _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001573 return False
1574 # checking range and correct of the port
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001575 port_int = int(port)
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001576 if port_int < 1 or port_int > 65000:
1577 return False
1578 # we expecting host contain only latters, digits and '-' or '_'
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001579 if not re.match('[a-zA-Z0-9-_\.]*$', hostname) or len(hostname) < 5:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001580 return False
1581 return True