blob: c79ab0575d96c4575d8a870514a9010562c1b2e8 [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Fang Deng5d518f42013-08-02 14:04:32 -07002# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5#
6# Expects to be run in an environment with sudo and no interactive password
7# prompt, such as within the Chromium OS development chroot.
8
9
10"""This file provides core logic for servo verify/repair process."""
11
12
Derek Beckettf73baca2020-08-19 15:08:47 -070013from __future__ import absolute_import
14from __future__ import division
15from __future__ import print_function
16
Fang Deng5d518f42013-08-02 14:04:32 -070017import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060018import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080019import re
20import tarfile
Andrew McRaef0679932020-08-13 09:15:23 +100021import threading
Garry Wang2b5eef92020-08-21 16:23:35 -070022import json
Garry Wangc1288cf2019-12-17 14:58:00 -080023import time
Derek Beckettf73baca2020-08-19 15:08:47 -070024import six
25import six.moves.xmlrpc_client
Otabek Kasimov120b6fa2020-07-03 00:15:27 -070026import calendar
Fang Deng5d518f42013-08-02 14:04:32 -070027
28from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070029from autotest_lib.client.common_lib import error
Richard Barnette9a26ad62016-06-10 12:03:08 -070030from autotest_lib.client.common_lib import hosts
Garry Wang7b0e1b72020-03-25 19:08:59 -070031from autotest_lib.client.common_lib import lsbrelease_utils
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070033from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070034from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000035from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070036from autotest_lib.server.hosts import base_servohost
Garry Wang11b5e872020-03-11 15:14:08 -070037from autotest_lib.server.hosts import servo_constants
Otabek Kasimov4ea636e2020-04-14 23:35:06 -070038from autotest_lib.server.cros.faft.utils import config
Garry Wang11b5e872020-03-11 15:14:08 -070039from autotest_lib.client.common_lib import global_config
Otabek Kasimov8475cce2020-07-14 12:11:31 -070040from autotest_lib.site_utils.admin_audit import servo_updater
Garry Wangd7367482020-02-27 13:52:40 -080041
Otabek Kasimov15963492020-06-23 21:10:51 -070042try:
43 from chromite.lib import metrics
44except ImportError:
45 metrics = utils.metrics_mock
46
Dan Shi3b2adf62015-09-02 17:46:54 -070047_CONFIG = global_config.global_config
Fang Deng5d518f42013-08-02 14:04:32 -070048
Otabek Kasimova7ba91a2020-03-09 08:31:01 -070049
Garry Wangebc015b2019-06-06 17:45:06 -070050class ServoHost(base_servohost.BaseServoHost):
51 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070052 that with a servo instance for a specific port.
53
54 @type _servo: servo.Servo | None
55 """
Fang Deng5d518f42013-08-02 14:04:32 -070056
Raul E Rangel52ca2e82018-07-03 14:10:14 -060057 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070058
Dan Shie5b3c512014-08-21 12:12:09 -070059 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070060 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070061
Otabek Kasimov545739c2020-08-20 00:24:21 -070062 # Default timeout for run terminal command.
63 DEFAULT_TERMINAL_TIMEOUT = 30
64
xixuan6cf6d2f2016-01-29 15:29:00 -080065 # Ready test function
66 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070067
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080068 # Directory prefix on the servo host where the servod logs are stored.
69 SERVOD_LOG_PREFIX = '/var/log/servod'
70
71 # Exit code to use when symlinks for servod logs are not found.
72 NO_SYMLINKS_CODE = 9
73
74 # Directory in the job's results directory to dump the logs into.
75 LOG_DIR = 'servod'
76
77 # Prefix for joint loglevel files in the logs.
78 JOINT_LOG_PREFIX = 'log'
79
80 # Regex group to extract timestamp from logfile name.
81 TS_GROUP = 'ts'
82
83 # This regex is used to extract the timestamp from servod logs.
84 # files always start with log.
85 TS_RE = (r'log.'
86 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
87 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
88 # The loglevel is optional depending on labstation version.
89 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
90 TS_EXTRACTOR = re.compile(TS_RE)
91
92 # Regex group to extract MCU name from logline in servod logs.
93 MCU_GROUP = 'mcu'
94
95 # Regex group to extract logline from MCU logline in servod logs.
96 LINE_GROUP = 'line'
97
98 # This regex is used to extract the mcu and the line content from an
99 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
100 # Here is an example log-line:
101 #
102 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
103 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
104 #
105 # Here is conceptually how they are formatted:
106 #
107 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
108 # <output>
109 #
110 # The log format starts with a timestamp
111 MCU_RE = (r'[\d\-]+ [\d:,]+ '
112 # The mcu that is logging this is next.
113 r'- (?P<%s>\w+) - '
114 # Next, we have more log outputs before the actual line.
115 # Information about the file line, logging function etc.
116 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
117 # NOTE: if the log format changes, this regex needs to be
118 # adjusted.
119 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
120 # Lastly, we get the MCU's console line.
121 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
122 MCU_EXTRACTOR = re.compile(MCU_RE)
123
Otabek Kasimov545739c2020-08-20 00:24:21 -0700124 # Regex to detect timeout messages when USBC pigtail has timeout issue.
125 # e.g.: [475635.427072 PD TMOUT RX 1/1]
126 USBC_PIGTAIL_TIMEOUT_RE = r'\[[\d \.]{1,20}(PD TMOUT RX 1\/1)\]'
127
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800128 # Suffix to identify compressed logfiles.
129 COMPRESSION_SUFFIX = '.tbz2'
130
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700131 # A suffix to mark servod log directories that came from instance that
132 # ran during this servo_host, but are not the last one running e.g. when
133 # an instance (on purpose, or due to a bug) restarted in the middle of the
134 # run.
135 OLD_LOG_SUFFIX = 'old'
136
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700137 # Mapping servo board with their vid-pid
138 SERVO_VID_PID = {
139 'servo_v4':'18d1:501b',
140 'ccd_cr50':'18d1:5014',
141 'servo_micro':'18d1:501a',
142 'servo_v3':['18d1:5004', '0403:6014'],
143 }
144
Otabek Kasimov15963492020-06-23 21:10:51 -0700145 # States of verifiers
146 # True - verifier run and passed
147 # False - verifier run and failed
148 # None - verifier did not run or dependency failed
149 VERIFY_SUCCESS = True
150 VERIFY_FAILED = False
151 VERIFY_NOT_RUN = None
152
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800153 def _init_attributes(self):
154 self._servo_state = None
155 self.servo_port = None
156 self.servo_board = None
157 self.servo_model = None
158 self.servo_serial = None
Garry Wang000c6c02020-05-11 21:27:23 -0700159 # The flag that indicate if a servo is connected to a smart usbhub.
160 # TODO(xianuowang@) remove this flag once all usbhubs in the lab
161 # get replaced.
162 self.smart_usbhub = None
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800163 self._servo = None
Andrew McRaef0679932020-08-13 09:15:23 +1000164 self._tunnel_proxy = None
165 self._tunnel_proxy_lock = threading.Lock()
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700166 self._initial_instance_ts = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800167 # Flag to make sure that multiple calls to close do not result in the
168 # logic executing multiple times.
169 self._closed = False
Andrew McRaef0679932020-08-13 09:15:23 +1000170 # Per-thread local data
171 self._local = threading.local()
Fang Deng5d518f42013-08-02 14:04:32 -0700172
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700173 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700174 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700175 servo_model=None, servo_serial=None, is_in_lab=None,
176 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700177 """Initialize a ServoHost instance.
178
179 A ServoHost instance represents a host that controls a servo.
180
181 @param servo_host: Name of the host where the servod process
182 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600183 @param servo_port: Port the servod process is listening on. Defaults
184 to the SERVOD_PORT environment variable if set,
185 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700186 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700187 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800188 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
189 to None, for which utils.host_is_in_lab_zone will be
190 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700191
192 """
193 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700194 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800195 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700196 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700197 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700198 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700199 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800200
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800201 # The location of the log files on the servo host for this instance.
202 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
203 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700204 # Path of the servo host lock file.
Derek Beckettf73baca2020-08-19 15:08:47 -0700205 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port) +
206 self.LOCK_FILE_POSTFIX)
Garry Wang79e9af62019-06-12 15:19:19 -0700207 # File path to declare a reboot request.
Derek Beckettf73baca2020-08-19 15:08:47 -0700208 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port) +
209 self.REBOOT_FILE_POSTFIX)
Garry Wang79e9af62019-06-12 15:19:19 -0700210
211 # Lock the servo host if it's an in-lab labstation to prevent other
212 # task to reboot it until current task completes. We also wait and
213 # make sure the labstation is up here, in the case of the labstation is
214 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700215 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700216 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
217 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700218 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700219
Richard Barnette9a26ad62016-06-10 12:03:08 -0700220 self._repair_strategy = (
221 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700222
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700223 def __str__(self):
224 return "<%s '%s:%s'>" % (
225 type(self).__name__, self.hostname, self.servo_port)
226
Richard Barnette9a26ad62016-06-10 12:03:08 -0700227 def connect_servo(self):
Garry Wang8c8dc972020-06-09 13:41:51 -0700228 """ Initialize and setup servo for later use.
229 """
230 self.initilize_servo()
231 self.initialize_dut_for_servo()
232
233
234 def initilize_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700235 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700236
237 Initializes `self._servo` and then verifies that all network
238 connections are working. This will create an ssh tunnel if
239 it's required.
Garry Wang8c8dc972020-06-09 13:41:51 -0700240 """
241 self._servo = servo.Servo(servo_host=self,
242 servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700243
Garry Wang8c8dc972020-06-09 13:41:51 -0700244
245 def initialize_dut_for_servo(self):
246 """This method will do some setup for dut control, e.g. setup
247 main servo_v4 device, and also testing the connection between servo
248 and DUT. As a side effect of testing the connection, all signals on
249 the target servo are reset to default values, and the USB stick is
Richard Barnette9a26ad62016-06-10 12:03:08 -0700250 set to the neutral (off) position.
251 """
Garry Wang8c8dc972020-06-09 13:41:51 -0700252 if not self._servo:
253 raise hosts.AutoservVerifyError('Servo object needs to be'
254 ' initialized before initialize'
255 ' DUT.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700256 timeout, _ = retry.timeout(
Garry Wang8c8dc972020-06-09 13:41:51 -0700257 self._servo.initialize_dut,
258 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700259 if timeout:
Garry Wang8c8dc972020-06-09 13:41:51 -0700260 raise hosts.AutoservVerifyError('Initialize dut for servo timed'
261 ' out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700262
263
264 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700265 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700266
267 If we've previously successfully connected to our servo,
268 disconnect any established ssh tunnel, and set `self._servo`
269 back to `None`.
270 """
271 if self._servo:
272 # N.B. This call is safe even without a tunnel:
273 # rpc_server_tracker.disconnect() silently ignores
274 # unknown ports.
275 self.rpc_server_tracker.disconnect(self.servo_port)
276 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700277
Garry Wangc1288cf2019-12-17 14:58:00 -0800278
Andrew McRaef0679932020-08-13 09:15:23 +1000279 def _maybe_create_servod_ssh_tunnel_proxy(self):
280 """Create a xmlrpc proxy for use with a ssh tunnel.
281 A lock is used to safely create a singleton proxy.
282 """
283 with self._tunnel_proxy_lock:
284 if self._tunnel_proxy is None:
285 self._tunnel_proxy = self.rpc_server_tracker.xmlrpc_connect(
286 None,
287 self.servo_port,
288 ready_test_name=self.SERVO_READY_METHOD,
289 timeout_seconds=60,
290 request_timeout_seconds=3600,
291 server_desc=str(self))
292
293
294 def get_servod_server_proxy(self):
295 """Return a proxy if it exists; otherwise, create a new one.
296 A proxy can either be a ssh tunnel based proxy, or a httplib
297 based proxy.
Fang Deng5d518f42013-08-02 14:04:32 -0700298
299 @returns: An xmlrpclib.ServerProxy that is connected to the servod
300 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700301 """
Garry Wang11b5e872020-03-11 15:14:08 -0700302 if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
303 and not self.is_localhost()):
Andrew McRaef0679932020-08-13 09:15:23 +1000304 # Check for existing ssh tunnel proxy.
305 if self._tunnel_proxy is None:
306 self._maybe_create_servod_ssh_tunnel_proxy()
307 return self._tunnel_proxy
Richard Barnette9a26ad62016-06-10 12:03:08 -0700308 else:
Andrew McRaef0679932020-08-13 09:15:23 +1000309 # xmlrpc/httplib is not thread-safe, so each thread must have its
310 # own separate proxy connection.
311 if not hasattr(self._local, "_per_thread_proxy"):
312 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
Derek Beckettf73baca2020-08-19 15:08:47 -0700313 self._local._per_thread_proxy = six.moves.xmlrpc_client.ServerProxy(remote)
Andrew McRaef0679932020-08-13 09:15:23 +1000314 return self._local._per_thread_proxy
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800315
316
Richard Barnette1edbb162016-11-01 11:47:50 -0700317 def verify(self, silent=False):
318 """Update the servo host and verify it's in a good state.
319
320 @param silent If true, suppress logging in `status.log`.
321 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700322 message = 'Beginning verify for servo host %s port %s serial %s'
323 message %= (self.hostname, self.servo_port, self.servo_serial)
324 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700325 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700326 self._repair_strategy.verify(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700327 self._servo_state = servo_constants.SERVO_STATE_WORKING
328 self.record('INFO', None, None,
329 'ServoHost verify set servo_state as WORKING')
Garry Wang63b8c382020-03-11 22:28:40 -0700330 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700331 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700332 self._servo_state = self.determine_servo_state()
333 self.record('INFO', None, None,
334 'ServoHost verify set servo_state as %s'
335 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700336 if self._is_critical_error(e):
337 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700338
Garry Wang2b5eef92020-08-21 16:23:35 -0700339 def _get_default_usbkey_mount_path(self):
340 return '/media/servo_usb/%s' % self.servo_port
Fang Deng5d518f42013-08-02 14:04:32 -0700341
Garry Wang7b0e1b72020-03-25 19:08:59 -0700342 def get_image_name_from_usbkey(self, usbkey_dev):
343 """Mount usb drive and check ChromeOS image name on it if there is
344 one. This method assumes the image_usbkey_direction is already set
345 to servo side.
346
347 @param: usbkey_dev usbkey dev path(e.g. /dev/sdb).
348
349 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
350 or empty string if no test image detected, or unexpected
351 error occurred.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700352 """
Garry Wang70e5d062020-04-03 18:01:05 -0700353 logging.info('Checking ChromeOS image name on usbkey.')
Garry Wang2b5eef92020-08-21 16:23:35 -0700354 mount_dst = self._get_default_usbkey_mount_path()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700355 # Unmount if there is an existing stale mount.
Garry Wang2b5eef92020-08-21 16:23:35 -0700356 self._unmount_drive(mount_dst)
357 # ChromeOS root fs is in /dev/sdx3
358 mount_src = usbkey_dev + '3'
Garry Wang7b0e1b72020-03-25 19:08:59 -0700359 try:
Garry Wang2b5eef92020-08-21 16:23:35 -0700360 if not self._mount_drive(mount_src, mount_dst):
361 logging.debug('Unexpected error occurred on mount usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700362 return ''
Garry Wang70e5d062020-04-03 18:01:05 -0700363
364 release_content = self.run(
Garry Wang2b5eef92020-08-21 16:23:35 -0700365 'cat %s/etc/lsb-release' % mount_dst,
Garry Wang70e5d062020-04-03 18:01:05 -0700366 ignore_status=True).stdout.strip()
367
368 if not re.search(r'RELEASE_TRACK=.*test', release_content):
369 logging.info('The image on usbkey is not a test image')
370 return ''
371
372 return lsbrelease_utils.get_chromeos_release_builder_path(
373 lsb_release_content=release_content)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700374 finally:
Garry Wang70e5d062020-04-03 18:01:05 -0700375 logging.debug('Image check compeleted, unmounting the usb drive.')
Garry Wang2b5eef92020-08-21 16:23:35 -0700376 self._unmount_drive(mount_dst)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700377
Garry Wang2b5eef92020-08-21 16:23:35 -0700378 def _extract_firmware_image_from_usbkey(self, fw_dst):
379 """Extract firmware images from the usbkey on servo, this method
380 assumes there is already a ChromeOS test image staged on servo.
381
382 @param: fw_dst the path that we'll copy firmware images to.
383
384 @returns: a json format string of firmware manifest data.
385 """
386 usbkey_dev = self._probe_and_validate_usb_dev()
387 if not usbkey_dev:
388 raise hosts.AutoservRepairError('Unexpected error occurred when'
389 ' probe usbkey dev path, please check logs for detail.')
390
391 mount_dst = self._get_default_usbkey_mount_path()
392 # Unmount if there is an existing stale mount.
393 self._unmount_drive(mount_dst)
394 # ChromeOS root fs is in /dev/sdx3
395 mount_src = usbkey_dev + '3'
396 try:
397 if not self._mount_drive(mount_src, mount_dst):
398 raise hosts.AutoservRepairError('Failed to extract firmware'
399 ' image; Unable to mount %s.' % usbkey_dev,
400 'unable to mount usbkey')
401 updater_bin = os.path.join(mount_dst,
402 'usr/sbin/chromeos-firmwareupdate')
403 self.run('%s --unpack %s' % (updater_bin, fw_dst))
404 return self.run('%s --manifest' % updater_bin).stdout
405 finally:
406 self._unmount_drive(mount_dst)
407
408 def prepare_repair_firmware_image(self, fw_dst=None):
409 """Prepare firmware image on the servohost for auto repair process
410 to consume.
411
412 @param: fw_dst the path that we want to store firmware image on
413 the servohost.
414
415 @returns: A tuple that containes ec firmware image path and bios
416 firmware image path on the servohost, or None if type of
417 image is not available based on manifest and dut's model.
418 """
419 model = self.servo_model or self._dut_host_info.model
420 if not model:
421 raise hosts.AutoservRepairError(
422 'Could not determine DUT\'s model.',
423 'model infomation unknown')
424
425 if not fw_dst:
426 fw_dst = '/tmp/firmware_image/%s' % self.servo_port
427 # Cleanup and re-create dst path to have a fresh start.
428 self.run('rm -rf %s' % fw_dst)
429 self.run('mkdir -p %s' % fw_dst)
430
431 manifest = json.loads(self._extract_firmware_image_from_usbkey(fw_dst))
432 model_manifest = manifest.get(model)
433 if not model_manifest:
434 raise hosts.AutoservRepairError('Could not find firmware manifest'
435 ' for model:%s' % model, 'model manifest not found')
436 try:
437 ec_image = os.path.join(fw_dst, model_manifest['ec']['image'])
438 except KeyError:
439 ec_image = None
440 try:
441 bios_image = os.path.join(fw_dst, model_manifest['host']['image'])
442 except KeyError:
443 bios_image = None
444 if not ec_image and not bios_image:
445 raise hosts.AutoservRepairError('Could not find any firmware image'
446 ' for model:%s' % model, 'cannot find firmware image')
447 return ec_image, bios_image
Garry Wang7b0e1b72020-03-25 19:08:59 -0700448
Garry Wang70e5d062020-04-03 18:01:05 -0700449 def _probe_and_validate_usb_dev(self):
450 """This method probe the usb dev path by talking to servo, and then
451 validate the dev path is valid block device to servohost.
452 Possible output:
453 1. Encounter error during probe usb dev, returns empty string.
454 2. probe usb dev completed without error but cannot find usb dev,
455 raise AutoservRepairError.
456 3. probe usb dev find a usb dev path, but failed validation in this
457 method, raise AutoservRepairError.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700458
Garry Wang70e5d062020-04-03 18:01:05 -0700459 @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
460 if unexpected error occurred during probe.
461 @raises: AutoservRepairError if servo couldn't probe the usb dev path
462 (servo.probe_host_usb_dev() returns empty string), or the dev path is
463 not valid block device to servohost.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700464 """
465 logging.info('Validating image usbkey on servo.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700466 try:
Garry Wang70e5d062020-04-03 18:01:05 -0700467 usb_dev = self._servo.probe_host_usb_dev()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700468 except Exception as e:
469 # We don't want any unexpected or transient servo communicating
470 # failure block usb repair, so capture all errors here.
471 logging.error(e, exc_info=True)
472 logging.error('Unexpected error occurred on get usbkey dev path,'
473 ' skipping usbkey validation.')
474 return ''
475
Garry Wang70e5d062020-04-03 18:01:05 -0700476 if usb_dev:
477 # probe_host_usb_dev() sometimes return stale record,
478 # so we need to make sure the path exists in fdisk.
479 validate_cmd = 'fdisk -l | grep %s' % usb_dev
Garry Wang11441182020-06-16 18:34:14 -0700480 try:
481 resp = self.run(validate_cmd, ignore_status=True, timeout=60)
482 if resp.exit_status == 0:
483 return usb_dev
Garry Wang70e5d062020-04-03 18:01:05 -0700484
Garry Wang11441182020-06-16 18:34:14 -0700485 logging.error('%s is reported from "image_usbkey_dev" control'
486 ' but not detected by fdisk!', usb_dev)
487 except error.AutoservRunError as e:
488 if 'Timeout encountered' in str(e):
489 logging.warning('Timeout encountered during fdisk run,'
490 ' skipping usbkey validation.')
491 return ''
492 raise
Garry Wang70e5d062020-04-03 18:01:05 -0700493
494 raise hosts.AutoservRepairError(
495 'No usbkey detected on servo, the usbkey may be either missing'
496 ' or broken. Please replace usbkey on the servo and retry.',
497 'missing usbkey')
498
499
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700500 def is_ec_supported(self):
Garry Wang9b8f2342020-04-17 16:34:09 -0700501 """Check if ec is supported on the servo_board"""
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700502 if self.servo_board:
503 try:
504 frm_config = config.Config(self.servo_board, self.servo_model)
505 return frm_config.chrome_ec
506 except Exception as e:
507 logging.error('Unexpected error when read from firmware'
508 ' configs; %s', str(e))
509 return False
510
Garry Wang70e5d062020-04-03 18:01:05 -0700511 def validate_image_usbkey(self):
512 """This method first validate if there is a recover usbkey on servo
513 that accessible to servohost, and second check if a ChromeOS image is
514 already on the usb drive and return the image_name so we can avoid
515 unnecessary download and flash to the recover usbkey on servo.
516
517 Please note that, there is special error handling logic here:
518 1. If unexpected error happens, we return empty string. So repair
519 actions will not get blocked.
520 2. If no working usbkey present on servo, but no errors, we'll raise
521 AutoservRepairError here.
522
523 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
524 or empty string if no test image detected, or unexpected
525 error occurred.
526 @raises: AutoservRepairError if the usbkey is not detected on servo.
527 """
528 usb_dev = self._probe_and_validate_usb_dev()
529 if usb_dev:
530 return self.get_image_name_from_usbkey(usb_dev)
531 else:
532 return ''
Garry Wang7b0e1b72020-03-25 19:08:59 -0700533
Richard Barnette1edbb162016-11-01 11:47:50 -0700534 def repair(self, silent=False):
535 """Attempt to repair servo host.
536
537 @param silent If true, suppress logging in `status.log`.
538 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700539 message = 'Beginning repair for servo host %s port %s serial %s'
540 message %= (self.hostname, self.servo_port, self.servo_serial)
541 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700542 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700543 self._repair_strategy.repair(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700544 self._servo_state = servo_constants.SERVO_STATE_WORKING
545 self.record('INFO', None, None,
546 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700547 # If target is a labstation then try to withdraw any existing
548 # reboot request created by this servo because it passed repair.
549 if self.is_labstation():
550 self.withdraw_reboot_request()
Garry Wang63b8c382020-03-11 22:28:40 -0700551 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700552 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700553 self._servo_state = self.determine_servo_state()
554 self.record('INFO', None, None,
555 'ServoHost repair set servo_state as %s'
556 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700557 if self._is_critical_error(e):
558 self.disconnect_servo()
559 self.stop_servod()
560 raise
561
562
563 def _is_critical_error(self, error):
564 if (isinstance(error, hosts.AutoservVerifyDependencyError)
565 and not error.is_critical()):
566 logging.warning('Non-critical verify failure(s) detected during'
567 ' verify/repair servo, servo connection will'
568 ' still up but may not fully functional.'
569 ' Some repair actions and servo depended'
570 ' tests may not run.')
571 return False
572 logging.info('Critical verify failure(s) detected during repair/verify'
573 ' servo. Disconnecting servo and stop servod, all repair '
574 'action and tests that depends on servo will not run.')
575 return True
Fang Deng5d518f42013-08-02 14:04:32 -0700576
577
Dan Shi4d478522014-02-14 13:46:32 -0800578 def get_servo(self):
579 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700580
Dan Shi4d478522014-02-14 13:46:32 -0800581 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700582 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700583 """
Dan Shi4d478522014-02-14 13:46:32 -0800584 return self._servo
585
586
Garry Wang79e9af62019-06-12 15:19:19 -0700587 def request_reboot(self):
588 """Request servohost to be rebooted when it's safe to by touch a file.
589 """
590 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700591 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700592 self.run('touch %s' % self._reboot_file, ignore_status=True)
593
594
Garry Wang464ff1e2019-07-18 17:20:34 -0700595 def withdraw_reboot_request(self):
596 """Withdraw a servohost reboot request if exists by remove the flag
597 file.
598 """
599 logging.debug('Withdrawing request to reboot servohost %s that created'
600 ' by servo with port # %s if exists.',
601 self.hostname, self.servo_port)
602 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
603
604
Garry Wangc1288cf2019-12-17 14:58:00 -0800605 def start_servod(self, quick_startup=False):
606 """Start the servod process on servohost.
607 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800608 # Skip if running on the localhost.(crbug.com/1038168)
609 if self.is_localhost():
610 logging.debug("Servohost is a localhost, skipping start servod.")
611 return
612
613 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800614 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800615 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800616 if self.servo_model:
617 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800618 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800619 logging.warning('Board for DUT is unknown; starting servod'
620 ' assuming a pre-configured board.')
621
622 cmd += ' PORT=%d' % self.servo_port
623 if self.servo_serial:
624 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800625
626 # Start servod with dual_v4 if the DUT/servo from designated pools.
627 dut_host_info = self.get_dut_host_info()
628 if dut_host_info:
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700629 # DUAL_V4: servo setup includes servo_micro and ccd_cr50
630 # connection to the DUT
631 is_dual_setup = False
Garry Wang11b5e872020-03-11 15:14:08 -0700632 if bool(dut_host_info.pools &
633 servo_constants.POOLS_SUPPORT_DUAL_V4):
Garry Wangd7367482020-02-27 13:52:40 -0800634 logging.debug('The DUT is detected in following designated'
635 ' pools %s,starting servod with DUAL_V4 option.',
Garry Wang11b5e872020-03-11 15:14:08 -0700636 servo_constants.POOLS_SUPPORT_DUAL_V4)
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700637 is_dual_setup = True
638 elif dut_host_info.attributes.get('servo_setup') == 'DUAL_V4':
639 logging.debug('The DUT servo setup specified in config as '
640 ' "DUAL_V4"')
641 is_dual_setup = True
642 if is_dual_setup:
Garry Wangd7367482020-02-27 13:52:40 -0800643 cmd += ' DUAL_V4=1'
644
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800645 # Remove the symbolic links from the logs. This helps ensure that
646 # a failed servod instantiation does not cause us to grab old logs
647 # by mistake.
648 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800649 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800650
651 # There's a lag between when `start servod` completes and when
652 # the _ServodConnectionVerifier trigger can actually succeed.
653 # The call to time.sleep() below gives time to make sure that
654 # the trigger won't fail after we return.
655
656 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
657 # But in the rare case all servo on a labstation are in heavy use they
658 # may take ~30 seconds. So the timeout value will double these value,
659 # and we'll try quick start up when first time initialize servohost,
660 # and use standard start up timeout in repair.
661 if quick_startup:
Garry Wang11b5e872020-03-11 15:14:08 -0700662 timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800663 else:
Garry Wang11b5e872020-03-11 15:14:08 -0700664 timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800665 logging.debug('Wait %s seconds for servod process fully up.', timeout)
666 time.sleep(timeout)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700667 # Cache the initial instance timestamp to check against servod restarts
668 self._initial_instance_ts = self.get_instance_logs_ts()
Garry Wangc1288cf2019-12-17 14:58:00 -0800669
670
671 def stop_servod(self):
672 """Stop the servod process on servohost.
673 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800674 # Skip if running on the localhost.(crbug.com/1038168)
675 if self.is_localhost():
676 logging.debug("Servohost is a localhost, skipping stop servod.")
677 return
678
Garry Wangc1288cf2019-12-17 14:58:00 -0800679 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800680 self.run('stop servod PORT=%d' % self.servo_port,
681 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800682 logging.debug('Wait %s seconds for servod process fully teardown.',
Garry Wang11b5e872020-03-11 15:14:08 -0700683 servo_constants.SERVOD_TEARDOWN_TIMEOUT)
684 time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
Garry Wangc1288cf2019-12-17 14:58:00 -0800685
686
687 def restart_servod(self, quick_startup=False):
688 """Restart the servod process on servohost.
689 """
690 self.stop_servod()
691 self.start_servod(quick_startup)
692
Garry Wang11b5e872020-03-11 15:14:08 -0700693
Garry Wangffbd2162020-04-17 16:13:48 -0700694 def _process_servodtool_error(self, response):
695 """Helper function to handle non-zero servodtool response.
696 """
697 if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
Garry Wangad245002020-05-15 15:20:23 -0700698 response.stdout):
Garry Wangffbd2162020-04-17 16:13:48 -0700699 logging.error('The servo is not plugged on a usb hub that supports'
700 ' power-cycle!')
Garry Wang000c6c02020-05-11 21:27:23 -0700701 # change the flag so we can update this label in later process.
702 self.smart_usbhub = False
Garry Wangffbd2162020-04-17 16:13:48 -0700703 return
704
705 if re.search(servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
706 self.servo_serial, response.stdout):
707 logging.error('No servo with serial %s found!', self.servo_serial)
708 return
709
710 logging.error('Unexpected error occurred from usbhub control, please'
711 ' file a bug and inform chrome-fleet-software@ team!')
712
Otabek Kasimov545739c2020-08-20 00:24:21 -0700713 def _is_usbc_pigtail_connection_timeout(self):
714 """Check if servo has issue with USBC pigtail connection timeout.
715
716 The usb_console has to be clean for good servo. If console generate
717 messages like (below) then issue is present:
718 [475635.427072 PD TMOUT RX 1/1]
719 RXERR1 Preamble
720 [475635.476044 PD TMOUT RX 1/1]
721 RXERR1 Preamble
722 """
723 if not self.servo_serial:
724 return False
725 logging.debug('Starting check if USBC pigtail connection timeout.')
726 try:
727 cmd = 'usb_console -d 18d1:501b -s %s' % self.servo_serial
728 resp = self.run(cmd, timeout=self.DEFAULT_TERMINAL_TIMEOUT)
729 result_lines = resp.stdout.splitlines()
730 for line in result_lines:
731 if re.match(self.USBC_PIGTAIL_TIMEOUT_RE, line):
732 return True
733 except Exception as e:
734 logging.debug('(Non-critical) %s.', e)
735 return False
736
737 def _reset_usbc_pigtail_connection(self):
738 """Reset USBC pigtail connection on servo board.
739
740 To reset need to run 'cc off' and then 'cc srcdts' in usb_console.
741 """
742 if not self.servo_serial:
743 return False
744 logging.debug('Starting reset USBC pigtail connection.')
745 def _run_command(cc_command):
746 """Run configuration chanel commands.
747
748 @returns: True if pas successful and False if fail.
749 """
750 try:
751 cmd = (r"echo 'cc %s' | usb_console -d 18d1:501b -s %s"
752 % (cc_command, self.servo_serial))
753 resp = self.run(cmd, timeout=self.DEFAULT_TERMINAL_TIMEOUT)
754 return True
755 except Exception as e:
756 logging.info('(Non-critical) %s.', e)
757 return False
758
759 logging.info('Turn off configuration channel. And wait 5 seconds.')
760 if _run_command('off'):
761 # wait till command will be effected
762 time.sleep(5)
763 logging.info('Turn on configuration channel. '
764 'And wait 15 seconds.')
765 if _run_command('srcdts'):
766 # wait till command will be effected
767 time.sleep(15)
768
769 def reset_usbc_pigtail_connection_on_need(self):
770 """Reset USBC pitgtail issue if it present."""
771 if not self.is_labstation():
772 logging.info('USBC pigtail reset applicable only for labstations')
773 return
774
775 if self._is_usbc_pigtail_connection_timeout():
776 logging.info('USBC pigtail issue detected on servo.')
777 self._reset_usbc_pigtail_connection()
778 fields = self._get_host_metrics_data()
779 fields['success'] = not self._is_usbc_pigtail_connection_timeout()
780 metrics.Counter(
781 'chromeos/autotest/repair/servo_usbc/reset'
782 ).increment(fields=fields)
Garry Wangffbd2162020-04-17 16:13:48 -0700783
784 def _get_servo_usb_devnum(self):
785 """Helper function to collect current usb devnum of servo.
786 """
Otabek Kasimov09192682020-06-01 18:17:44 -0700787 # TODO remove try-except when fix crbug.com/1087964
788 try:
789 cmd = 'servodtool device -s %s usb-path' % self.servo_serial
790 resp = self.run(cmd, ignore_status=True, timeout=30)
791 except Exception as e:
792 # Here we catch only timeout errors.
793 # Other errors is filtered by ignore_status=True
794 logging.debug('Attempt to get servo usb-path failed due to '
795 'timeout; %s', e)
796 return ''
797
Garry Wangffbd2162020-04-17 16:13:48 -0700798 if resp.exit_status != 0:
799 self._process_servodtool_error(resp)
800 return ''
801 usb_path = resp.stdout.strip()
802 logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
803
804 resp = self.run('cat %s/devnum' % usb_path,
805 ignore_status=True)
806 if resp.exit_status != 0:
807 self._process_servodtool_error(resp)
808 return ''
809 return resp.stdout.strip()
810
Garry Wang358aad42020-08-02 20:56:04 -0700811 def reboot_servo_v3_on_need(self):
812 """Check and reboot servo_v3 based on below conditions.
813 1. If there is an update pending on reboot.
814 2. Servo_v3 has been up for more than 96 hours.
815 """
816 if self.get_board() != 'beaglebone_servo':
817 logging.info('Servo reboot is only applicable for servo V3.')
Otabek Kasimove6df8102020-07-21 20:15:25 -0700818 return
819
Garry Wang358aad42020-08-02 20:56:04 -0700820 update_pending_reboot = (self._check_update_status() ==
821 self.UPDATE_STATE.PENDING_REBOOT)
822 uptime_hours = float(self.check_uptime())/3600
823 logging.info('Uptime of servo_v3: %s hour(s)', uptime_hours)
824 long_up_time = uptime_hours > 96
825
826 # Skip reboot if neither condition are met.
827 if not (update_pending_reboot or long_up_time):
Otabek Kasimove6df8102020-07-21 20:15:25 -0700828 return
829
Garry Wang358aad42020-08-02 20:56:04 -0700830 if update_pending_reboot:
831 message = 'Starting reboot servo_v3 because an update is pending.'
832 reboot_method = self._post_update_reboot
833 elif long_up_time:
834 message = 'Starting reboot servo_v3 because uptime > 96 hours.'
835 reboot_method = self._servo_host_reboot
836 self.record('INFO', None, None, message)
837 logging.info(message)
Otabek Kasimove6df8102020-07-21 20:15:25 -0700838 try:
Garry Wang358aad42020-08-02 20:56:04 -0700839 reboot_method()
Otabek Kasimove6df8102020-07-21 20:15:25 -0700840 message = 'Servo_v3 reboot completed successfully.'
841 except Exception as e:
842 logging.debug("Fail to reboot servo_v3; %s", e)
843 message = ('Servo_v3 reboot failed, please check debug log '
844 'for details.')
845 logging.info(message)
846 self.record('INFO', None, None, message)
Garry Wangffbd2162020-04-17 16:13:48 -0700847
848 def _reset_servo(self):
849 logging.info('Resetting servo through smart usbhub.')
Otabek Kasimov09192682020-06-01 18:17:44 -0700850 # TODO remove try-except when fix crbug.com/1087964
851 try:
852 resp = self.run('servodtool device -s %s power-cycle' %
853 self.servo_serial, ignore_status=True,
854 timeout=30)
855 if resp.exit_status != 0:
856 self._process_servodtool_error(resp)
857 return False
858 except Exception as e:
859 # Here we catch only timeout errors.
860 # Other errors is filtered by ignore_status=True
861 logging.debug('Attempt to reset servo failed due to timeout;'
862 ' %s', e)
Garry Wangffbd2162020-04-17 16:13:48 -0700863 return False
864
865 logging.debug('Wait %s seconds for servo to come back from reset.',
866 servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
867 time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
Garry Wang000c6c02020-05-11 21:27:23 -0700868 # change the flag so we can update this label in later process.
869 self.smart_usbhub = True
Garry Wangffbd2162020-04-17 16:13:48 -0700870 return True
871
872
873 def reset_servo(self):
874 """Reset(power-cycle) the servo via smart usbhub.
875 """
876 if not self.is_labstation():
877 logging.info('Servo reset is not applicable to servo_v3.')
878 return
879
880 pre_reset_devnum = self._get_servo_usb_devnum()
881 logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
882 result = self._reset_servo()
883 if not result:
Garry Wangfd5c8b62020-06-08 15:36:54 -0700884 message = ('Failed to reset servo with serial: %s. (Please ignore'
885 ' this error if the DUT is not connected to a smart'
886 ' usbhub).' % self.servo_serial)
Garry Wangffbd2162020-04-17 16:13:48 -0700887 logging.warning(message)
888 self.record('INFO', None, None, message)
889 return
890
891 post_reset_devnum = self._get_servo_usb_devnum()
892 logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
893 if not (pre_reset_devnum and post_reset_devnum):
894 message = ('Servo reset completed but unable to verify'
895 ' devnum change!')
896 elif pre_reset_devnum != post_reset_devnum:
897 message = ('Reset servo with serial %s completed successfully!'
898 % self.servo_serial)
899 else:
900 message = 'Servo reset completed but devnum is still not changed!'
901 logging.info(message)
902 self.record('INFO', None, None, message)
903
904
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800905 def _extract_compressed_logs(self, logdir, relevant_files):
906 """Decompress servod logs in |logdir|.
907
908 @param logdir: directory containing compressed servod logs.
909 @param relevant_files: list of files in |logdir| to consider.
910
911 @returns: tuple, (tarfiles, files) where
912 tarfiles: list of the compressed filenames that have been
913 extracted and deleted
914 files: list of the uncompressed files that were generated
915 """
916 # For all tar-files, first extract them to the directory, and
917 # then let the common flow handle them.
918 tarfiles = [cf for cf in relevant_files if
919 cf.endswith(self.COMPRESSION_SUFFIX)]
920 files = []
921 for f in tarfiles:
922 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
923 with tarfile.open(f) as tf:
924 # Each tarfile has only one member, as
925 # that's the compressed log.
926 member = tf.members[0]
927 # Manipulate so that it only extracts the basename, and not
928 # the directories etc.
929 member.name = norm_name
930 files.append(os.path.join(logdir, member.name))
931 tf.extract(member, logdir)
932 # File has been extracted: remove the compressed file.
933 os.remove(f)
934 return tarfiles, files
935
936 def _extract_mcu_logs(self, log_subdir):
937 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
938
939 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
940 lines from the logs to generate invidiual console logs e.g. after
941 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
942 those MCUs had any console input/output.
943
944 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
945 """
946 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
947 # files
948 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
949 if not os.path.exists(mcu_lines_file):
950 logging.info('No DEBUG logs found to extract MCU logs from.')
951 return
952 mcu_files = {}
953 mcu_file_template = '%s.txt'
954 with open(mcu_lines_file, 'r') as f:
955 for line in f:
956 match = self.MCU_EXTRACTOR.match(line)
957 if match:
958 mcu = match.group(self.MCU_GROUP).lower()
959 line = match.group(self.LINE_GROUP)
960 if mcu not in mcu_files:
961 mcu_file = os.path.join(log_subdir,
962 mcu_file_template % mcu)
963 mcu_files[mcu] = open(mcu_file, 'a')
964 fd = mcu_files[mcu]
965 fd.write(line + '\n')
966 for f in mcu_files:
967 mcu_files[f].close()
968
969
970 def remove_latest_log_symlinks(self):
971 """Remove the conveninence symlinks 'latest' servod logs."""
972 symlink_wildcard = '%s/latest*' % self.remote_log_dir
973 cmd = 'rm ' + symlink_wildcard
974 self.run(cmd, stderr_tee=None, ignore_status=True)
975
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700976 def probe_servod_restart(self, instance_ts, outdir):
977 """Grab servod logs from previous instances if part of this session.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800978
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700979 If since the last time this host called start_servod() servod crashed
980 and restarted, this helper finds those logs as well, and stores them
981 with the |OLD_LOG_SUFFIX| to investigate if necessary.
Prasad Vuppalapu5bd9da12020-03-31 01:46:47 +0000982
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700983 It also issues a panicinfo command to servo devices after the restart
984 to try and collect reboot information for debugging.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800985
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700986 @param instance_ts: the log timestamp that the current instance uses
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800987 @param outdir: directory to create a subdirectory into to place the
988 servod logs into.
989 """
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700990 if self._initial_instance_ts is None:
991 logging.info('No log timestamp grabbed successfully on servod '
992 'startup. Cannot check device restarts. Ignoring.')
993 return
994 if instance_ts == self._initial_instance_ts:
995 logging.debug('Servod appears to have run without restarting')
996 return
997 # Servod seems to have restarted (at least once). |_initial_instance_ts|
998 # is the first timestamp, and instance_ts is the current timestamp. Find
999 # all timestamps in between them, and grab the logs for each.
1000 tss = self._find_instance_timestamps_between(self._initial_instance_ts,
1001 instance_ts)
1002 logging.info('Servod has restarted %d times between the start and the '
1003 'end of this servo_host.', len(tss))
1004 logging.info('This might be an issue. Will extract all logs from each '
1005 'instance.')
1006 logging.info('Logs that are not the currently running (about to turn '
1007 'down) instance are maked with a .%s in their folder.',
1008 self.OLD_LOG_SUFFIX)
1009 for ts in tss:
1010 self.get_instance_logs(ts, outdir, old=True)
1011 # Lastly, servod has restarted due to a potential issue. Try to get
1012 # panic information from servo micro and servo v4 for the current logs.
1013 # This can only happen if the |_servo| attribute is initialized.
1014 if self._servo:
1015 for mcu in ['servo_micro', 'servo_v4']:
1016 ctrl = '%s_uart_cmd' % mcu
1017 if self._servo.has_control(ctrl):
1018 logging.info('Trying to retrieve %r panicinfo into logs',
1019 mcu)
1020 try:
1021 self._servo.set_nocheck(ctrl, 'panicinfo')
1022 except error.TestFail as e:
1023 logging.error('Failed to generate panicinfo for %r '
1024 'logs. %s', mcu, str(e))
1025
1026 def _find_instance_timestamps_between(self, start_ts, end_ts):
1027 """Find all log timestamps between [start_ts, end_ts).
1028
1029 @param start_ts: str, earliest log timestamp of interest
1030 @param end_ts: str, latest log timestamp of interest
1031
1032 @returns: list, all timestamps between start_ts and end_ts, end_ts
1033 exclusive, on the servo_host. An empty list on errors
1034 """
1035 # Simply get all timestamp, and then sort and remove
1036 cmd = 'ls %s' % self.remote_log_dir
1037 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1038 if res.exit_status != 0:
1039 # Here we failed to find anything.
1040 logging.info('Failed to find remote servod logs. Ignoring.')
1041 return []
1042 logfiles = res.stdout.strip().split()
1043 timestamps = set()
1044 for logfile in logfiles:
1045 ts_match = self.TS_EXTRACTOR.match(logfile)
1046 if not ts_match:
1047 # Simply ignore files that fail the check. It might be the
1048 # 'latest' symlinks or random files.
1049 continue
1050 timestamps.add(ts_match.group(self.TS_GROUP))
1051 # At this point we have all unique timestamps.
1052 timestamps = sorted(timestamps)
1053 for ts in [start_ts, end_ts]:
1054 if ts not in timestamps:
1055 logging.error('Timestamp %r not in servod logs. Cannot query '
1056 'for timestamps in between %r and %r', ts,
1057 start_ts, end_ts)
1058 return []
1059 return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
1060
1061 def get_instance_logs_ts(self):
1062 """Retrieve the currently running servod instance's log timestamp
1063
1064 @returns: str, timestamp for current instance, or None on failure
1065 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001066 # First, extract the timestamp. This cmd gives the real filename of
1067 # the latest aka current log file.
1068 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
1069 'then realpath %(dir)s/latest.DEBUG;'
1070 'elif [ -f %(dir)s/latest ];'
1071 'then realpath %(dir)s/latest;'
1072 'else exit %(code)d;'
1073 'fi' % {'dir': self.remote_log_dir,
1074 'code': self.NO_SYMLINKS_CODE})
1075 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1076 if res.exit_status != 0:
1077 if res.exit_status == self.NO_SYMLINKS_CODE:
1078 logging.warning('servod log latest symlinks not found. '
1079 'This is likely due to an error starting up '
1080 'servod. Ignoring..')
1081 else:
1082 logging.warning('Failed to find servod logs on servo host.')
1083 logging.warning(res.stderr.strip())
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001084 return None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001085 fname = os.path.basename(res.stdout.strip())
1086 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
Ruben Rodriguez Buchillone9aa2b02020-03-04 12:14:28 -08001087 ts_match = self.TS_EXTRACTOR.match(fname)
1088 if not ts_match:
1089 logging.warning('Failed to extract timestamp from servod log file '
1090 '%r. Skipping. The servo host is using outdated '
1091 'servod logging and needs to be updated.', fname)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001092 return None
1093 return ts_match.group(self.TS_GROUP)
1094
1095 def get_instance_logs(self, instance_ts, outdir, old=False):
1096 """Collect all logs with |instance_ts| and dump into a dir in |outdir|
1097
1098 This method first collects all logs on the servo_host side pertaining
1099 to this servod instance (port, instatiation). It glues them together
1100 into combined log.[level].txt files and extracts all available MCU
1101 console I/O from the logs into individual files e.g. servo_v4.txt
1102
1103 All the output can be found in a directory inside |outdir| that
1104 this generates based on |LOG_DIR|, the servod port, and the instance
1105 timestamp on the servo_host side.
1106
1107 @param instance_ts: log timestamp to grab logfiles for
1108 @param outdir: directory to create a subdirectory into to place the
1109 servod logs into.
1110 @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
1111 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001112 # Create the local results log dir.
1113 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
1114 str(self.servo_port),
1115 instance_ts))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001116 if old:
1117 log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
1118 logging.info('Saving servod logs to %r.', log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001119 os.mkdir(log_dir)
1120 # Now, get all files with that timestamp.
1121 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
1122 instance_ts)
1123 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1124 files = res.stdout.strip().split()
1125 try:
1126 self.get_file(files, log_dir, try_rsync=False)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001127 if not os.listdir(log_dir):
1128 logging.info('No servod logs retrieved. Ignoring, and removing '
1129 '%r again.', log_dir)
1130 os.rmdir(log_dir)
1131 return
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001132 except error.AutoservRunError as e:
1133 result = e.result_obj
1134 if result.exit_status != 0:
1135 stderr = result.stderr.strip()
1136 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
1137 stderr or '\n%s' % result)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001138 # Remove the log_dir as nothing was added to it.
1139 os.rmdir(log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001140 return
1141 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
1142 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
1143 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
1144 # Create the joint files for each loglevel. i.e log.DEBUG
1145 joint_file = self.JOINT_LOG_PREFIX
1146 if level_name:
1147 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
1148 # This helps with some online tools to avoid complaints about an
1149 # unknown filetype.
1150 joint_file = joint_file + '.txt'
1151 joint_path = os.path.join(log_dir, joint_file)
1152 files = [f for f in local_files if level_name in f]
1153 if not files:
1154 # TODO(crrev.com/c/1793030): remove no-level case once CL
1155 # is pushed
1156 continue
1157 # Extract compressed logs if any.
1158 compressed, extracted = self._extract_compressed_logs(log_dir,
1159 files)
1160 files = list(set(files) - set(compressed))
1161 files.extend(extracted)
1162 # Need to sort. As they all share the same timestamp, and
1163 # loglevel, the index itself is sufficient. The highest index
1164 # is the oldest file, therefore we need a descending sort.
1165 def sortkey(f, level=level_name):
1166 """Custom sortkey to sort based on rotation number int."""
1167 if f.endswith(level_name): return 0
1168 return int(f.split('.')[-1])
1169
1170 files.sort(reverse=True, key=sortkey)
1171 # Just rename the first file rather than building from scratch.
1172 os.rename(files[0], joint_path)
1173 with open(joint_path, 'a') as joint_f:
1174 for logfile in files[1:]:
1175 # Transfer the file to the joint file line by line.
1176 with open(logfile, 'r') as log_f:
1177 for line in log_f:
1178 joint_f.write(line)
1179 # File has been written over. Delete safely.
1180 os.remove(logfile)
1181 # Need to remove all files form |local_files| so we don't
1182 # analyze them again.
1183 local_files = list(set(local_files) - set(files) - set(compressed))
1184 # Lastly, extract MCU logs from the joint logs.
1185 self._extract_mcu_logs(log_dir)
1186
Garry Wang79e9af62019-06-12 15:19:19 -07001187 def _lock(self):
1188 """lock servohost by touching a file.
1189 """
1190 logging.debug('Locking servohost %s by touching %s file',
1191 self.hostname, self._lock_file)
1192 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001193 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -07001194
1195
1196 def _unlock(self):
1197 """Unlock servohost by removing the lock file.
1198 """
1199 logging.debug('Unlocking servohost by removing %s file',
1200 self._lock_file)
1201 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001202 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -07001203
1204
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001205 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -07001206 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001207 # NOTE: throughout this method there are multiple attempts to catch
1208 # all errors. This is WAI as log grabbing should not fail tests.
1209 # However, the goal is to catch and handle/process all errors, thus
1210 # we print the traceback and ask for a bug.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001211 if self._closed:
1212 logging.debug('ServoHost is already closed.')
1213 return
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001214 instance_ts = self.get_instance_logs_ts()
1215 # TODO(crbug.com/1011516): once enabled, remove the check against
1216 # localhost and instead check against log-rotiation enablement.
1217 logs_available = (instance_ts is not None and
1218 self.job and
1219 not self.is_localhost())
1220 if logs_available:
1221 # Probe whether there was a servod restart, and grab those old
1222 # logs as well.
1223 try:
1224 self.probe_servod_restart(instance_ts, self.job.resultdir)
1225 except (error.AutoservRunError, error.TestFail) as e:
1226 logging.info('Failed to grab servo logs due to: %s. '
1227 'This error is forgiven.', str(e))
1228 except Exception as e:
1229 logging.error('Unexpected error probing for old logs. %s. '
1230 'Forgiven. Please file a bug and fix or catch '
1231 'in log probing function', str(e),
1232 exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001233 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001234 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -07001235 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001236 self._servo.close(outdir)
1237
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001238 if logs_available:
1239 # Grab current (not old like above) logs after the servo instance
1240 # was closed out.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001241 try:
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001242 self.get_instance_logs(instance_ts, self.job.resultdir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001243 except error.AutoservRunError as e:
1244 logging.info('Failed to grab servo logs due to: %s. '
1245 'This error is forgiven.', str(e))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001246 except Exception as e:
1247 logging.error('Unexpected error grabbing servod logs. %s. '
1248 'Forgiven. Please file a bug and fix or catch '
1249 'in log grabbing function', str(e), exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001250
Garry Wang7c00b0f2019-06-25 17:28:17 -07001251 if self._is_locked:
1252 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -07001253 try:
1254 self._unlock()
1255 except error.AutoservSSHTimeout:
1256 logging.error('Unlock servohost failed due to ssh timeout.'
1257 ' It may caused by servohost went down during'
1258 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -08001259 # We want always stop servod after task to minimum the impact of bad
1260 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -08001261 try:
1262 self.stop_servod()
1263 except error.AutoservRunError as e:
1264 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001265 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -08001266
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001267 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001268 # Mark closed.
1269 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001270
1271
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001272 def get_servo_state(self):
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001273 return self._servo_state
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001274
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001275 def _get_host_metrics_data(self):
1276 return {'port': self.servo_port,
Otabek Kasimov0ea47362020-07-11 20:55:09 -07001277 'host': self.get_dut_hostname() or self.hostname,
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001278 'board': self.servo_board or ''}
1279
1280 def _is_servo_device_connected(self, servo_type, serial):
1281 """Check if device is connected to the labstation.
1282
1283 Works for all servo devices connected to the labstation.
1284 For servo_v3 please use 'self._is_servo_board_present_on_servo_v3'
1285
1286 @param servo_type: The type of servo device. Expecting value can be
1287 servo_v4 or servo_micro.
1288 @param serial: The serial number of the device to detect it.
1289 """
1290 vid_pid = self.SERVO_VID_PID.get(servo_type)
1291 if not vid_pid or not serial:
1292 # device cannot detected without VID/PID or serial number
1293 return False
1294 logging.debug('Started to detect %s', servo_type)
1295 try:
1296 cmd = 'lsusb -v -d %s |grep iSerial |grep %s' % (vid_pid, serial)
1297 result = self.run(cmd, ignore_status=True, timeout=30)
1298 if result.exit_status == 0 and result.stdout.strip():
1299 logging.debug('The %s is plugged in to the host.', servo_type)
1300 return True
1301 logging.debug('%s device is not detected; %s', servo_type, result)
1302 return False
1303 except Exception as e:
1304 # can be triggered by timeout issue due running the script
1305 metrics.Counter(
1306 'chromeos/autotest/repair/servo_detection/timeout'
1307 ).increment(fields=self._get_host_metrics_data())
1308 logging.error('%s device is not detected; %s', servo_type, str(e))
1309 return None
1310
1311 def _is_servo_board_present_on_servo_v3(self):
1312 """Check if servo board is detected on servo_v3"""
1313 vid_pids = self.SERVO_VID_PID['servo_v3']
1314 if not vid_pids or len(vid_pids) == 0:
1315 # device cannot detected without VID/PID
1316 return False
1317 logging.debug('Started to detect servo board on servo_v3')
1318 not_detected = 'The servo board is not detected on servo_v3'
1319 try:
1320 cmd = 'lsusb | grep "%s"' % "\|".join(vid_pids)
1321 result = self.run(cmd, ignore_status=True, timeout=30)
1322 if result.exit_status == 0 and result.stdout.strip():
1323 logging.debug('The servo board is detected on servo_v3')
1324 return True
1325 logging.debug('%s; %s', not_detected, result)
1326 return False
1327 except Exception as e:
1328 # can be triggered by timeout issue due running the script
1329 metrics.Counter(
1330 'chromeos/autotest/repair/servo_detection/timeout'
1331 ).increment(fields=self._get_host_metrics_data())
1332 logging.error('%s; %s', not_detected, str(e))
1333 return None
1334
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001335 def _is_main_device_not_detected_on_servo_v4(self):
1336 """Check if servod cannot find main device on servo.
1337
1338 The check based on reading servod logs for servo_v4.
1339 """
1340 if not self._initial_instance_ts:
1341 # latest log not found
1342 return False
1343 logging.debug('latest log for servod created at %s',
1344 self._initial_instance_ts)
1345 try:
1346 log_created = calendar.timegm(time.strptime(
1347 self._initial_instance_ts,
1348 "%Y-%m-%d--%H-%M-%S.%f"))
1349 except ValueError as e:
1350 logging.debug('Cannot read time from log file name: %s',
1351 self._initial_instance_ts)
1352 return False
1353 min_time_created = calendar.timegm(time.gmtime())
1354 if min_time_created > log_created + 3600:
1355 # the log file is old we cannot use it
1356 logging.debug('log file was created more than hour ago, too old')
1357 return False
1358 logging.debug('latest log was created not longer then 1 hour ago')
1359
1360 # check if servod can detect main device by servo_v4
1361 message = 'ERROR - No servo micro or CCD detected for board'
1362 cmd = ('cat /var/log/servod_%s/log.%s.INFO |grep "%s"'
1363 % (self.servo_port, self._initial_instance_ts, message))
1364 result = self.run(cmd, ignore_status=True)
1365 if result.stdout.strip():
1366 logging.info('Servod cannot detect main device on the servo; '
1367 'Can be caused by bad hardware of servo or '
1368 'issue on the DUT side.')
1369 return True
1370 logging.debug('The main device is detected')
1371 return False
1372
Otabek Kasimov15963492020-06-23 21:10:51 -07001373 def get_verify_state(self, tag):
1374 """Return the state of servo verifier.
1375
1376 @returns: bool or None
1377 """
1378 return self._repair_strategy.verifier_is_good(tag)
1379
1380 def determine_servo_state(self):
1381 """Determine servo state based on the failed verifier.
1382
1383 @returns: servo state value
1384 The state detecting based on first fail verifier or collecting of
1385 them.
1386 """
1387 ssh = self.get_verify_state('servo_ssh')
1388 disk_space = self.get_verify_state('disk_space')
1389 start_servod = self.get_verify_state('servod_job')
1390 create_servo = self.get_verify_state('servod_connection')
1391 init_servo = self.get_verify_state('servod_control')
1392 pwr_button = self.get_verify_state('pwr_button')
1393 lid_open = self.get_verify_state('lid_open')
1394 ec_board = self.get_verify_state('ec_board')
1395 ccd_testlab = self.get_verify_state('ccd_testlab')
1396
1397 if not ssh:
1398 return servo_constants.SERVO_STATE_NO_SSH
1399
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001400 if (start_servod == self.VERIFY_FAILED
1401 or create_servo == self.VERIFY_FAILED):
1402 # sometimes servo can start with out present servo
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001403 if self.is_labstation():
1404 if not self.servo_serial:
1405 return servo_constants.SERVO_STATE_WRONG_CONFIG
1406 if self._is_servo_device_connected(
1407 'servo_v4',
1408 self.servo_serial) == False:
1409 return servo_constants.SERVO_STATE_NOT_CONNECTED
1410 elif self._is_servo_board_present_on_servo_v3() == False:
1411 return servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001412
1413 if start_servod == self.VERIFY_FAILED:
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001414 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1415
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001416 if create_servo == self.VERIFY_FAILED:
1417 if (self.is_labstation()
1418 and self._is_main_device_not_detected_on_servo_v4()):
1419 servo_type = None
1420 if self.get_dut_host_info():
1421 servo_type = self.get_dut_host_info().get_label_value(
1422 servo_constants.SERVO_TYPE_LABEL_PREFIX)
1423 if servo_type and 'servo_micro' in servo_type:
1424 serial = self.get_servo_micro_serial_number()
1425 logging.debug('servo_micro serial: %s', serial)
1426 if self._is_servo_device_detected('servo_micro',
1427 serial):
1428 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1429 # Device can be not detected because of DUT
1430 # TODO (otabek) update after b/159755652 and b/159754985
1431 metrics.Counter(
1432 'chromeos/autotest/repair/servo_state/needs_replacement'
1433 ).increment(fields=self._get_host_metrics_data())
1434 elif not self.is_labstation():
1435 # Here need logic to check if flex cable is connected
1436 pass
1437
Otabek Kasimov15963492020-06-23 21:10:51 -07001438 # one of the reason why servo can not initialized
1439 if ccd_testlab == self.VERIFY_FAILED:
1440 return servo_constants.SERVO_STATE_CCD_TESTLAB_ISSUE
1441
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001442 if (create_servo == self.VERIFY_FAILED
1443 or init_servo == self.VERIFY_FAILED):
Otabek Kasimov15963492020-06-23 21:10:51 -07001444 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1445
Otabek Kasimov015c15c2020-08-20 00:40:42 -07001446 if ec_board == self.VERIFY_FAILED:
1447 return servo_constants.SERVO_STATE_EC_BROKEN
Otabek Kasimov15963492020-06-23 21:10:51 -07001448 if pwr_button == self.VERIFY_FAILED:
1449 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1450 if lid_open == self.VERIFY_FAILED:
1451 return servo_constants.SERVO_STATE_LID_OPEN_FAILED
Otabek Kasimov15963492020-06-23 21:10:51 -07001452
Otabek Kasimov15963492020-06-23 21:10:51 -07001453 metrics.Counter(
1454 'chromeos/autotest/repair/unknown_servo_state'
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001455 ).increment(fields=self._get_host_metrics_data())
Otabek Kasimov15963492020-06-23 21:10:51 -07001456 logging.info('We do not have special state for this failure yet :)')
1457 return servo_constants.SERVO_STATE_BROKEN
1458
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001459
Richard Barnetteea3e4602016-06-10 12:36:41 -07001460def make_servo_hostname(dut_hostname):
1461 """Given a DUT's hostname, return the hostname of its servo.
1462
1463 @param dut_hostname: hostname of a DUT.
1464
1465 @return hostname of the DUT's servo.
1466
1467 """
1468 host_parts = dut_hostname.split('.')
1469 host_parts[0] = host_parts[0] + '-servo'
1470 return '.'.join(host_parts)
1471
1472
1473def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001474 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001475
1476 @param servo_hostname: hostname of the servo host.
1477
1478 @return True if it's up, False otherwise
1479 """
1480 # Technically, this duplicates the SSH ping done early in the servo
1481 # proxy initialization code. However, this ping ends in a couple
1482 # seconds when if fails, rather than the 60 seconds it takes to decide
1483 # that an SSH ping has timed out. Specifically, that timeout happens
1484 # when our servo DNS name resolves, but there is no host at that IP.
1485 logging.info('Pinging servo host at %s', servo_hostname)
1486 ping_config = ping_runner.PingConfig(
1487 servo_hostname, count=3,
1488 ignore_result=True, ignore_status=True)
1489 return ping_runner.PingRunner().ping(ping_config).received > 0
1490
1491
Richard Barnettee519dcd2016-08-15 17:37:17 -07001492def _map_afe_board_to_servo_board(afe_board):
1493 """Map a board we get from the AFE to a servo appropriate value.
1494
1495 Many boards are identical to other boards for servo's purposes.
1496 This function makes that mapping.
1497
1498 @param afe_board string board name received from AFE.
1499 @return board we expect servo to have.
1500
1501 """
1502 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1503 BOARD_MAP = {'gizmo': 'panther'}
1504 mapped_board = afe_board
1505 if afe_board in BOARD_MAP:
1506 mapped_board = BOARD_MAP[afe_board]
1507 else:
1508 for suffix in KNOWN_SUFFIXES:
1509 if afe_board.endswith(suffix):
1510 mapped_board = afe_board[0:-len(suffix)]
1511 break
1512 if mapped_board != afe_board:
1513 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1514 return mapped_board
1515
1516
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001517def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001518 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001519
Richard Barnetteea3e4602016-06-10 12:36:41 -07001520 @param dut_host Instance of `Host` on which to find the servo
1521 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -07001522 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001523 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001524 info = dut_host.host_info_store.get()
Derek Beckettf73baca2020-08-19 15:08:47 -07001525 servo_args = {k: v for k, v in six.iteritems(info.attributes)
Garry Wang11b5e872020-03-11 15:14:08 -07001526 if k in servo_constants.SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -07001527
Garry Wang11b5e872020-03-11 15:14:08 -07001528 if servo_constants.SERVO_PORT_ATTR in servo_args:
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001529 try:
Garry Wang11b5e872020-03-11 15:14:08 -07001530 servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1531 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001532 except ValueError:
1533 logging.error('servo port is not an int: %s',
Garry Wang11b5e872020-03-11 15:14:08 -07001534 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001535 # Reset servo_args because we don't want to use an invalid port.
Garry Wang11b5e872020-03-11 15:14:08 -07001536 servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001537
1538 if info.board:
Garry Wang11b5e872020-03-11 15:14:08 -07001539 servo_board = _map_afe_board_to_servo_board(info.board)
1540 servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -07001541 if info.model:
Garry Wang11b5e872020-03-11 15:14:08 -07001542 servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1543 return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -07001544
1545
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001546def _tweak_args_for_ssp_moblab(servo_args):
Garry Wang11b5e872020-03-11 15:14:08 -07001547 if (servo_args[servo_constants.SERVO_HOST_ATTR]
1548 in ['localhost', '127.0.0.1']):
1549 servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001550 'SSP', 'host_container_ip', type=str, default=None)
1551
1552
Dan Shi023aae32016-05-25 11:13:01 -07001553def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001554 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001555 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -08001556
Richard Barnette9a26ad62016-06-10 12:03:08 -07001557 This function attempts to create and verify or repair a `ServoHost`
1558 object for a servo connected to the given `dut`, subject to various
1559 constraints imposed by the parameters:
1560 * When the `servo_args` parameter is not `None`, a servo
1561 host must be created, and must be checked with `repair()`.
1562 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1563 true:
1564 * If `try_servo_repair` is true, then create a servo host and
1565 check it with `repair()`.
1566 * Otherwise, if the servo responds to `ping` then create a
1567 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -08001568
Richard Barnette9a26ad62016-06-10 12:03:08 -07001569 In cases where `servo_args` was not `None`, repair failure
1570 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001571 are logged and then discarded. Note that this only happens in cases
1572 where we're called from a test (not special task) control file that
1573 has an explicit dependency on servo. In that case, we require that
1574 repair not write to `status.log`, so as to avoid polluting test
1575 results.
1576
1577 TODO(jrbarnette): The special handling for servo in test control
1578 files is a thorn in my flesh; I dearly hope to see it cut out before
1579 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001580
1581 Parameters for a servo host consist of a host name, port number, and
1582 DUT board, and are determined from one of these sources, in order of
1583 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -07001584 * Servo attributes from the `dut` parameter take precedence over
1585 all other sources of information.
1586 * If a DNS entry for the servo based on the DUT hostname exists in
1587 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -07001588 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001589 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -07001590 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001591
1592 @param dut An instance of `Host` from which to take
1593 servo parameters (if available).
1594 @param servo_args A dictionary with servo parameters to use if
1595 they can't be found from `dut`. If this
1596 argument is supplied, unrepaired exceptions
1597 from `verify()` will be passed back to the
1598 caller.
1599 @param try_lab_servo If not true, servo host creation will be
1600 skipped unless otherwise required by the
1601 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001602 @param try_servo_repair If true, check a servo host with
1603 `repair()` instead of `verify()`.
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001604 @param dut_host_info: A HostInfo object of the DUT that connected
1605 to this servo.
Dan Shi4d478522014-02-14 13:46:32 -08001606
1607 @returns: A ServoHost object or None. See comments above.
1608
1609 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001610 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001611 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001612 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -07001613 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001614 if utils.in_moblab_ssp():
1615 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001616 logging.debug(
1617 'Overriding provided servo_args (%s) with arguments'
1618 ' determined from the host (%s)',
1619 servo_args,
1620 servo_args_override,
1621 )
Richard Barnetteea3e4602016-06-10 12:36:41 -07001622 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001623
Richard Barnetteea3e4602016-06-10 12:36:41 -07001624 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001625 logging.debug('No servo_args provided, and failed to find overrides.')
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001626 if try_lab_servo or servo_dependency:
Otabek Kasimov646812c2020-06-23 20:01:36 -07001627 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001628 else:
1629 # For regular test case which not required the servo
1630 return None, None
1631
Garry Wang11b5e872020-03-11 15:14:08 -07001632 servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1633 servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001634 if not _is_servo_host_information_exist(servo_hostname, servo_port):
1635 logging.debug(
1636 'Servo connection info missed hostname: %s , port: %s',
1637 servo_hostname, servo_port)
Otabek Kasimov646812c2020-06-23 20:01:36 -07001638 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001639 if not is_servo_host_information_valid(servo_hostname, servo_port):
1640 logging.debug(
1641 'Servo connection info is incorrect hostname: %s , port: %s',
1642 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001643 return None, servo_constants.SERVO_STATE_WRONG_CONFIG
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001644 if (not servo_dependency and not try_servo_repair and
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001645 not servo_host_is_up(servo_hostname)):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001646 logging.debug('ServoHost is not up.')
Otabek Kasimov646812c2020-06-23 20:01:36 -07001647 return None, servo_constants.SERVO_STATE_NO_SSH
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001648
Garry Wangebc015b2019-06-06 17:45:06 -07001649 newhost = ServoHost(**servo_args)
Garry Wangffbd2162020-04-17 16:13:48 -07001650
Otabek Kasimove6df8102020-07-21 20:15:25 -07001651 # Reset or reboot servo device only during AdminRepair tasks.
1652 if try_servo_repair:
1653 if newhost._is_locked:
1654 # Reset servo if the servo is locked, as we check if the servohost
1655 # is up, if the servohost is labstation and if the servohost is in
1656 # lab inside the locking logic.
Otabek Kasimov545739c2020-08-20 00:24:21 -07001657 newhost.reset_usbc_pigtail_connection_on_need()
Otabek Kasimove6df8102020-07-21 20:15:25 -07001658 newhost.reset_servo()
1659 else:
Garry Wang358aad42020-08-02 20:56:04 -07001660 try:
1661 newhost.reboot_servo_v3_on_need()
Garry Wang1f0d5332020-08-10 19:32:32 -07001662 except Exception as e:
1663 logging.info('[Non-critical] Unexpected error while trying to'
1664 ' reboot servo_v3, skipping the reboot; %s', e)
Otabek Kasimove6df8102020-07-21 20:15:25 -07001665
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -07001666 if dut:
1667 newhost.set_dut_hostname(dut.hostname)
Otabek Kasimov9e90ae12020-08-14 03:01:19 -07001668 if dut_host_info:
1669 newhost.set_dut_host_info(dut_host_info)
Garry Wangffbd2162020-04-17 16:13:48 -07001670
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001671 if try_lab_servo or try_servo_repair:
1672 try:
1673 logging.info("Check and update servo firmware.")
1674 servo_updater.update_servo_firmware(
1675 newhost,
1676 force_update=False)
1677 except Exception as e:
1678 logging.error("Servo device update error: %s", e)
1679
Garry Wangcdd27b22020-01-13 14:59:11 -08001680 try:
1681 newhost.restart_servod(quick_startup=True)
1682 except error.AutoservSSHTimeout:
1683 logging.warning("Restart servod failed due ssh connection "
1684 "to servohost timed out. This error is forgiven"
1685 " here, we will retry in servo repair process.")
1686 except error.AutoservRunError as e:
1687 logging.warning("Restart servod failed due to:\n%s\n"
1688 "This error is forgiven here, we will retry"
1689 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -07001690
Richard Barnette9a26ad62016-06-10 12:03:08 -07001691 # Note that the logic of repair() includes everything done
1692 # by verify(). It's sufficient to call one or the other;
1693 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001694 if servo_dependency:
1695 newhost.repair(silent=True)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001696 return newhost, newhost.get_servo_state()
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001697
1698 if try_servo_repair:
1699 try:
1700 newhost.repair()
1701 except Exception:
1702 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -07001703 else:
1704 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001705 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001706 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001707 logging.exception('servo verify failed for %s', newhost.hostname)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001708 return newhost, newhost.get_servo_state()
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001709
1710
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001711def _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001712 if hostname is None or len(hostname.strip()) == 0:
1713 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001714 if port is None:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001715 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001716 if not type(port) is int:
1717 try:
1718 int(port)
1719 except ValueError:
1720 return False
1721
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001722 return True
1723
1724
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001725def is_servo_host_information_valid(hostname, port):
Garry Wang9b8f2342020-04-17 16:34:09 -07001726 """Check if provided servo attributes are valid.
1727
1728 @param hostname Hostname of the servohost.
1729 @param port servo port number.
1730
1731 @returns: A bool value to indicate if provided servo attribute valid.
1732 """
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001733 if not _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001734 return False
1735 # checking range and correct of the port
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001736 port_int = int(port)
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001737 if port_int < 1 or port_int > 65000:
1738 return False
1739 # we expecting host contain only latters, digits and '-' or '_'
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001740 if not re.match('[a-zA-Z0-9-_\.]*$', hostname) or len(hostname) < 5:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001741 return False
1742 return True