blob: 741640e7e943effbbc40b35a14846318a2001211 [file] [log] [blame]
Derek Beckettf73baca2020-08-19 15:08:47 -07001# Lint as: python2, python3
Fang Deng5d518f42013-08-02 14:04:32 -07002# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5#
6# Expects to be run in an environment with sudo and no interactive password
7# prompt, such as within the Chromium OS development chroot.
8
9
10"""This file provides core logic for servo verify/repair process."""
11
12
Derek Beckettf73baca2020-08-19 15:08:47 -070013from __future__ import absolute_import
14from __future__ import division
15from __future__ import print_function
16
Fang Deng5d518f42013-08-02 14:04:32 -070017import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060018import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080019import re
20import tarfile
Andrew McRaef0679932020-08-13 09:15:23 +100021import threading
Garry Wang2b5eef92020-08-21 16:23:35 -070022import json
Garry Wangc1288cf2019-12-17 14:58:00 -080023import time
Derek Beckettf73baca2020-08-19 15:08:47 -070024import six
25import six.moves.xmlrpc_client
Otabek Kasimov120b6fa2020-07-03 00:15:27 -070026import calendar
Fang Deng5d518f42013-08-02 14:04:32 -070027
28from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070029from autotest_lib.client.common_lib import error
Richard Barnette9a26ad62016-06-10 12:03:08 -070030from autotest_lib.client.common_lib import hosts
Garry Wang7b0e1b72020-03-25 19:08:59 -070031from autotest_lib.client.common_lib import lsbrelease_utils
Fang Deng5d518f42013-08-02 14:04:32 -070032from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070033from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070034from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000035from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070036from autotest_lib.server.hosts import base_servohost
Garry Wang11b5e872020-03-11 15:14:08 -070037from autotest_lib.server.hosts import servo_constants
Otabek Kasimov4ea636e2020-04-14 23:35:06 -070038from autotest_lib.server.cros.faft.utils import config
Garry Wang11b5e872020-03-11 15:14:08 -070039from autotest_lib.client.common_lib import global_config
Otabek Kasimov8475cce2020-07-14 12:11:31 -070040from autotest_lib.site_utils.admin_audit import servo_updater
Garry Wangd7367482020-02-27 13:52:40 -080041
Otabek Kasimov15963492020-06-23 21:10:51 -070042try:
43 from chromite.lib import metrics
44except ImportError:
45 metrics = utils.metrics_mock
46
Dan Shi3b2adf62015-09-02 17:46:54 -070047_CONFIG = global_config.global_config
Fang Deng5d518f42013-08-02 14:04:32 -070048
Otabek Kasimova7ba91a2020-03-09 08:31:01 -070049
Garry Wangebc015b2019-06-06 17:45:06 -070050class ServoHost(base_servohost.BaseServoHost):
51 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070052 that with a servo instance for a specific port.
53
54 @type _servo: servo.Servo | None
55 """
Fang Deng5d518f42013-08-02 14:04:32 -070056
Raul E Rangel52ca2e82018-07-03 14:10:14 -060057 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070058
Dan Shie5b3c512014-08-21 12:12:09 -070059 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070060 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070061
Otabek Kasimov545739c2020-08-20 00:24:21 -070062 # Default timeout for run terminal command.
63 DEFAULT_TERMINAL_TIMEOUT = 30
64
xixuan6cf6d2f2016-01-29 15:29:00 -080065 # Ready test function
66 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070067
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080068 # Directory prefix on the servo host where the servod logs are stored.
69 SERVOD_LOG_PREFIX = '/var/log/servod'
70
71 # Exit code to use when symlinks for servod logs are not found.
72 NO_SYMLINKS_CODE = 9
73
74 # Directory in the job's results directory to dump the logs into.
75 LOG_DIR = 'servod'
76
77 # Prefix for joint loglevel files in the logs.
78 JOINT_LOG_PREFIX = 'log'
79
80 # Regex group to extract timestamp from logfile name.
81 TS_GROUP = 'ts'
82
83 # This regex is used to extract the timestamp from servod logs.
84 # files always start with log.
85 TS_RE = (r'log.'
86 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
87 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
88 # The loglevel is optional depending on labstation version.
89 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
90 TS_EXTRACTOR = re.compile(TS_RE)
91
92 # Regex group to extract MCU name from logline in servod logs.
93 MCU_GROUP = 'mcu'
94
95 # Regex group to extract logline from MCU logline in servod logs.
96 LINE_GROUP = 'line'
97
98 # This regex is used to extract the mcu and the line content from an
99 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
100 # Here is an example log-line:
101 #
102 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
103 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
104 #
105 # Here is conceptually how they are formatted:
106 #
107 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
108 # <output>
109 #
110 # The log format starts with a timestamp
111 MCU_RE = (r'[\d\-]+ [\d:,]+ '
112 # The mcu that is logging this is next.
113 r'- (?P<%s>\w+) - '
114 # Next, we have more log outputs before the actual line.
115 # Information about the file line, logging function etc.
116 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
117 # NOTE: if the log format changes, this regex needs to be
118 # adjusted.
119 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
120 # Lastly, we get the MCU's console line.
121 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
122 MCU_EXTRACTOR = re.compile(MCU_RE)
123
Otabek Kasimov545739c2020-08-20 00:24:21 -0700124 # Regex to detect timeout messages when USBC pigtail has timeout issue.
125 # e.g.: [475635.427072 PD TMOUT RX 1/1]
126 USBC_PIGTAIL_TIMEOUT_RE = r'\[[\d \.]{1,20}(PD TMOUT RX 1\/1)\]'
127
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800128 # Suffix to identify compressed logfiles.
129 COMPRESSION_SUFFIX = '.tbz2'
130
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700131 # A suffix to mark servod log directories that came from instance that
132 # ran during this servo_host, but are not the last one running e.g. when
133 # an instance (on purpose, or due to a bug) restarted in the middle of the
134 # run.
135 OLD_LOG_SUFFIX = 'old'
136
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700137 # Mapping servo board with their vid-pid
138 SERVO_VID_PID = {
139 'servo_v4':'18d1:501b',
140 'ccd_cr50':'18d1:5014',
141 'servo_micro':'18d1:501a',
142 'servo_v3':['18d1:5004', '0403:6014'],
143 }
144
Otabek Kasimov15963492020-06-23 21:10:51 -0700145 # States of verifiers
146 # True - verifier run and passed
147 # False - verifier run and failed
148 # None - verifier did not run or dependency failed
149 VERIFY_SUCCESS = True
150 VERIFY_FAILED = False
151 VERIFY_NOT_RUN = None
152
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800153 def _init_attributes(self):
154 self._servo_state = None
155 self.servo_port = None
156 self.servo_board = None
157 self.servo_model = None
158 self.servo_serial = None
Garry Wang000c6c02020-05-11 21:27:23 -0700159 # The flag that indicate if a servo is connected to a smart usbhub.
160 # TODO(xianuowang@) remove this flag once all usbhubs in the lab
161 # get replaced.
162 self.smart_usbhub = None
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800163 self._servo = None
Andrew McRaef0679932020-08-13 09:15:23 +1000164 self._tunnel_proxy = None
165 self._tunnel_proxy_lock = threading.Lock()
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700166 self._initial_instance_ts = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800167 # Flag to make sure that multiple calls to close do not result in the
168 # logic executing multiple times.
169 self._closed = False
Andrew McRaef0679932020-08-13 09:15:23 +1000170 # Per-thread local data
171 self._local = threading.local()
Fang Deng5d518f42013-08-02 14:04:32 -0700172
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700173 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700174 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700175 servo_model=None, servo_serial=None, is_in_lab=None,
176 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700177 """Initialize a ServoHost instance.
178
179 A ServoHost instance represents a host that controls a servo.
180
181 @param servo_host: Name of the host where the servod process
182 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600183 @param servo_port: Port the servod process is listening on. Defaults
184 to the SERVOD_PORT environment variable if set,
185 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700186 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700187 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800188 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
189 to None, for which utils.host_is_in_lab_zone will be
190 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700191
192 """
193 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700194 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800195 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700196 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700197 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700198 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700199 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800200
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800201 # The location of the log files on the servo host for this instance.
202 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
203 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700204 # Path of the servo host lock file.
Derek Beckettf73baca2020-08-19 15:08:47 -0700205 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port) +
206 self.LOCK_FILE_POSTFIX)
Garry Wang79e9af62019-06-12 15:19:19 -0700207 # File path to declare a reboot request.
Derek Beckettf73baca2020-08-19 15:08:47 -0700208 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port) +
209 self.REBOOT_FILE_POSTFIX)
Garry Wang79e9af62019-06-12 15:19:19 -0700210
211 # Lock the servo host if it's an in-lab labstation to prevent other
212 # task to reboot it until current task completes. We also wait and
213 # make sure the labstation is up here, in the case of the labstation is
214 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700215 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700216 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
217 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700218 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700219
Richard Barnette9a26ad62016-06-10 12:03:08 -0700220 self._repair_strategy = (
221 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700222
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700223 def __str__(self):
224 return "<%s '%s:%s'>" % (
225 type(self).__name__, self.hostname, self.servo_port)
226
Richard Barnette9a26ad62016-06-10 12:03:08 -0700227 def connect_servo(self):
Garry Wang8c8dc972020-06-09 13:41:51 -0700228 """ Initialize and setup servo for later use.
229 """
230 self.initilize_servo()
231 self.initialize_dut_for_servo()
232
Garry Wang8c8dc972020-06-09 13:41:51 -0700233 def initilize_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700234 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700235
236 Initializes `self._servo` and then verifies that all network
237 connections are working. This will create an ssh tunnel if
238 it's required.
Garry Wang8c8dc972020-06-09 13:41:51 -0700239 """
240 self._servo = servo.Servo(servo_host=self,
241 servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700242
Garry Wang8c8dc972020-06-09 13:41:51 -0700243 def initialize_dut_for_servo(self):
244 """This method will do some setup for dut control, e.g. setup
245 main servo_v4 device, and also testing the connection between servo
246 and DUT. As a side effect of testing the connection, all signals on
247 the target servo are reset to default values, and the USB stick is
Richard Barnette9a26ad62016-06-10 12:03:08 -0700248 set to the neutral (off) position.
249 """
Garry Wang8c8dc972020-06-09 13:41:51 -0700250 if not self._servo:
251 raise hosts.AutoservVerifyError('Servo object needs to be'
252 ' initialized before initialize'
253 ' DUT.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700254 timeout, _ = retry.timeout(
Garry Wang8c8dc972020-06-09 13:41:51 -0700255 self._servo.initialize_dut,
256 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700257 if timeout:
Garry Wang8c8dc972020-06-09 13:41:51 -0700258 raise hosts.AutoservVerifyError('Initialize dut for servo timed'
259 ' out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700260
Richard Barnette9a26ad62016-06-10 12:03:08 -0700261 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700262 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700263
264 If we've previously successfully connected to our servo,
265 disconnect any established ssh tunnel, and set `self._servo`
266 back to `None`.
267 """
268 if self._servo:
269 # N.B. This call is safe even without a tunnel:
270 # rpc_server_tracker.disconnect() silently ignores
271 # unknown ports.
272 self.rpc_server_tracker.disconnect(self.servo_port)
273 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700274
Andrew McRaef0679932020-08-13 09:15:23 +1000275 def _maybe_create_servod_ssh_tunnel_proxy(self):
276 """Create a xmlrpc proxy for use with a ssh tunnel.
277 A lock is used to safely create a singleton proxy.
278 """
279 with self._tunnel_proxy_lock:
280 if self._tunnel_proxy is None:
281 self._tunnel_proxy = self.rpc_server_tracker.xmlrpc_connect(
282 None,
283 self.servo_port,
284 ready_test_name=self.SERVO_READY_METHOD,
285 timeout_seconds=60,
286 request_timeout_seconds=3600,
287 server_desc=str(self))
288
Andrew McRaef0679932020-08-13 09:15:23 +1000289 def get_servod_server_proxy(self):
290 """Return a proxy if it exists; otherwise, create a new one.
291 A proxy can either be a ssh tunnel based proxy, or a httplib
292 based proxy.
Fang Deng5d518f42013-08-02 14:04:32 -0700293
294 @returns: An xmlrpclib.ServerProxy that is connected to the servod
295 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700296 """
Garry Wang11b5e872020-03-11 15:14:08 -0700297 if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
298 and not self.is_localhost()):
Andrew McRaef0679932020-08-13 09:15:23 +1000299 # Check for existing ssh tunnel proxy.
300 if self._tunnel_proxy is None:
301 self._maybe_create_servod_ssh_tunnel_proxy()
302 return self._tunnel_proxy
Richard Barnette9a26ad62016-06-10 12:03:08 -0700303 else:
Andrew McRaef0679932020-08-13 09:15:23 +1000304 # xmlrpc/httplib is not thread-safe, so each thread must have its
305 # own separate proxy connection.
306 if not hasattr(self._local, "_per_thread_proxy"):
307 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
Derek Beckettf73baca2020-08-19 15:08:47 -0700308 self._local._per_thread_proxy = six.moves.xmlrpc_client.ServerProxy(remote)
Andrew McRaef0679932020-08-13 09:15:23 +1000309 return self._local._per_thread_proxy
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800310
Richard Barnette1edbb162016-11-01 11:47:50 -0700311 def verify(self, silent=False):
312 """Update the servo host and verify it's in a good state.
313
314 @param silent If true, suppress logging in `status.log`.
315 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700316 message = 'Beginning verify for servo host %s port %s serial %s'
317 message %= (self.hostname, self.servo_port, self.servo_serial)
318 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700319 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700320 self._repair_strategy.verify(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700321 self._servo_state = servo_constants.SERVO_STATE_WORKING
322 self.record('INFO', None, None,
323 'ServoHost verify set servo_state as WORKING')
Garry Wang63b8c382020-03-11 22:28:40 -0700324 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700325 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700326 self._servo_state = self.determine_servo_state()
327 self.record('INFO', None, None,
328 'ServoHost verify set servo_state as %s'
329 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700330 if self._is_critical_error(e):
331 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700332
Garry Wang2b5eef92020-08-21 16:23:35 -0700333 def _get_default_usbkey_mount_path(self):
334 return '/media/servo_usb/%s' % self.servo_port
Fang Deng5d518f42013-08-02 14:04:32 -0700335
Garry Wang7b0e1b72020-03-25 19:08:59 -0700336 def get_image_name_from_usbkey(self, usbkey_dev):
337 """Mount usb drive and check ChromeOS image name on it if there is
338 one. This method assumes the image_usbkey_direction is already set
339 to servo side.
340
341 @param: usbkey_dev usbkey dev path(e.g. /dev/sdb).
342
343 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
344 or empty string if no test image detected, or unexpected
345 error occurred.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700346 """
Garry Wang70e5d062020-04-03 18:01:05 -0700347 logging.info('Checking ChromeOS image name on usbkey.')
Garry Wang2b5eef92020-08-21 16:23:35 -0700348 mount_dst = self._get_default_usbkey_mount_path()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700349 # Unmount if there is an existing stale mount.
Garry Wang2b5eef92020-08-21 16:23:35 -0700350 self._unmount_drive(mount_dst)
351 # ChromeOS root fs is in /dev/sdx3
352 mount_src = usbkey_dev + '3'
Garry Wang7b0e1b72020-03-25 19:08:59 -0700353 try:
Garry Wang2b5eef92020-08-21 16:23:35 -0700354 if not self._mount_drive(mount_src, mount_dst):
355 logging.debug('Unexpected error occurred on mount usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700356 return ''
Garry Wang70e5d062020-04-03 18:01:05 -0700357
358 release_content = self.run(
Garry Wang2b5eef92020-08-21 16:23:35 -0700359 'cat %s/etc/lsb-release' % mount_dst,
Garry Wang70e5d062020-04-03 18:01:05 -0700360 ignore_status=True).stdout.strip()
361
362 if not re.search(r'RELEASE_TRACK=.*test', release_content):
363 logging.info('The image on usbkey is not a test image')
364 return ''
365
366 return lsbrelease_utils.get_chromeos_release_builder_path(
367 lsb_release_content=release_content)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700368 finally:
Garry Wang70e5d062020-04-03 18:01:05 -0700369 logging.debug('Image check compeleted, unmounting the usb drive.')
Garry Wang2b5eef92020-08-21 16:23:35 -0700370 self._unmount_drive(mount_dst)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700371
Garry Wang2b5eef92020-08-21 16:23:35 -0700372 def _extract_firmware_image_from_usbkey(self, fw_dst):
373 """Extract firmware images from the usbkey on servo, this method
374 assumes there is already a ChromeOS test image staged on servo.
375
376 @param: fw_dst the path that we'll copy firmware images to.
377
378 @returns: a json format string of firmware manifest data.
379 """
380 usbkey_dev = self._probe_and_validate_usb_dev()
381 if not usbkey_dev:
382 raise hosts.AutoservRepairError('Unexpected error occurred when'
383 ' probe usbkey dev path, please check logs for detail.')
384
385 mount_dst = self._get_default_usbkey_mount_path()
386 # Unmount if there is an existing stale mount.
387 self._unmount_drive(mount_dst)
388 # ChromeOS root fs is in /dev/sdx3
389 mount_src = usbkey_dev + '3'
390 try:
391 if not self._mount_drive(mount_src, mount_dst):
392 raise hosts.AutoservRepairError('Failed to extract firmware'
393 ' image; Unable to mount %s.' % usbkey_dev,
394 'unable to mount usbkey')
395 updater_bin = os.path.join(mount_dst,
396 'usr/sbin/chromeos-firmwareupdate')
397 self.run('%s --unpack %s' % (updater_bin, fw_dst))
398 return self.run('%s --manifest' % updater_bin).stdout
399 finally:
400 self._unmount_drive(mount_dst)
401
402 def prepare_repair_firmware_image(self, fw_dst=None):
403 """Prepare firmware image on the servohost for auto repair process
404 to consume.
405
406 @param: fw_dst the path that we want to store firmware image on
407 the servohost.
408
409 @returns: A tuple that containes ec firmware image path and bios
410 firmware image path on the servohost, or None if type of
411 image is not available based on manifest and dut's model.
412 """
413 model = self.servo_model or self._dut_host_info.model
414 if not model:
415 raise hosts.AutoservRepairError(
416 'Could not determine DUT\'s model.',
417 'model infomation unknown')
418
419 if not fw_dst:
420 fw_dst = '/tmp/firmware_image/%s' % self.servo_port
421 # Cleanup and re-create dst path to have a fresh start.
422 self.run('rm -rf %s' % fw_dst)
423 self.run('mkdir -p %s' % fw_dst)
424
425 manifest = json.loads(self._extract_firmware_image_from_usbkey(fw_dst))
426 model_manifest = manifest.get(model)
427 if not model_manifest:
428 raise hosts.AutoservRepairError('Could not find firmware manifest'
429 ' for model:%s' % model, 'model manifest not found')
430 try:
431 ec_image = os.path.join(fw_dst, model_manifest['ec']['image'])
432 except KeyError:
433 ec_image = None
434 try:
435 bios_image = os.path.join(fw_dst, model_manifest['host']['image'])
436 except KeyError:
437 bios_image = None
438 if not ec_image and not bios_image:
439 raise hosts.AutoservRepairError('Could not find any firmware image'
440 ' for model:%s' % model, 'cannot find firmware image')
441 return ec_image, bios_image
Garry Wang7b0e1b72020-03-25 19:08:59 -0700442
Garry Wang70e5d062020-04-03 18:01:05 -0700443 def _probe_and_validate_usb_dev(self):
444 """This method probe the usb dev path by talking to servo, and then
445 validate the dev path is valid block device to servohost.
446 Possible output:
447 1. Encounter error during probe usb dev, returns empty string.
448 2. probe usb dev completed without error but cannot find usb dev,
449 raise AutoservRepairError.
450 3. probe usb dev find a usb dev path, but failed validation in this
451 method, raise AutoservRepairError.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700452
Garry Wang70e5d062020-04-03 18:01:05 -0700453 @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
454 if unexpected error occurred during probe.
455 @raises: AutoservRepairError if servo couldn't probe the usb dev path
456 (servo.probe_host_usb_dev() returns empty string), or the dev path is
457 not valid block device to servohost.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700458 """
459 logging.info('Validating image usbkey on servo.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700460 try:
Garry Wang70e5d062020-04-03 18:01:05 -0700461 usb_dev = self._servo.probe_host_usb_dev()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700462 except Exception as e:
463 # We don't want any unexpected or transient servo communicating
464 # failure block usb repair, so capture all errors here.
465 logging.error(e, exc_info=True)
466 logging.error('Unexpected error occurred on get usbkey dev path,'
467 ' skipping usbkey validation.')
468 return ''
469
Garry Wang70e5d062020-04-03 18:01:05 -0700470 if usb_dev:
471 # probe_host_usb_dev() sometimes return stale record,
472 # so we need to make sure the path exists in fdisk.
473 validate_cmd = 'fdisk -l | grep %s' % usb_dev
Garry Wang11441182020-06-16 18:34:14 -0700474 try:
475 resp = self.run(validate_cmd, ignore_status=True, timeout=60)
476 if resp.exit_status == 0:
477 return usb_dev
Garry Wang70e5d062020-04-03 18:01:05 -0700478
Garry Wang11441182020-06-16 18:34:14 -0700479 logging.error('%s is reported from "image_usbkey_dev" control'
480 ' but not detected by fdisk!', usb_dev)
481 except error.AutoservRunError as e:
482 if 'Timeout encountered' in str(e):
483 logging.warning('Timeout encountered during fdisk run,'
484 ' skipping usbkey validation.')
485 return ''
486 raise
Garry Wang70e5d062020-04-03 18:01:05 -0700487
488 raise hosts.AutoservRepairError(
489 'No usbkey detected on servo, the usbkey may be either missing'
490 ' or broken. Please replace usbkey on the servo and retry.',
491 'missing usbkey')
492
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700493 def is_ec_supported(self):
Garry Wang9b8f2342020-04-17 16:34:09 -0700494 """Check if ec is supported on the servo_board"""
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700495 if self.servo_board:
496 try:
497 frm_config = config.Config(self.servo_board, self.servo_model)
498 return frm_config.chrome_ec
499 except Exception as e:
500 logging.error('Unexpected error when read from firmware'
501 ' configs; %s', str(e))
502 return False
503
Garry Wang70e5d062020-04-03 18:01:05 -0700504 def validate_image_usbkey(self):
505 """This method first validate if there is a recover usbkey on servo
506 that accessible to servohost, and second check if a ChromeOS image is
507 already on the usb drive and return the image_name so we can avoid
508 unnecessary download and flash to the recover usbkey on servo.
509
510 Please note that, there is special error handling logic here:
511 1. If unexpected error happens, we return empty string. So repair
512 actions will not get blocked.
513 2. If no working usbkey present on servo, but no errors, we'll raise
514 AutoservRepairError here.
515
516 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
517 or empty string if no test image detected, or unexpected
518 error occurred.
519 @raises: AutoservRepairError if the usbkey is not detected on servo.
520 """
521 usb_dev = self._probe_and_validate_usb_dev()
522 if usb_dev:
523 return self.get_image_name_from_usbkey(usb_dev)
524 else:
525 return ''
Garry Wang7b0e1b72020-03-25 19:08:59 -0700526
Richard Barnette1edbb162016-11-01 11:47:50 -0700527 def repair(self, silent=False):
528 """Attempt to repair servo host.
529
530 @param silent If true, suppress logging in `status.log`.
531 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700532 message = 'Beginning repair for servo host %s port %s serial %s'
533 message %= (self.hostname, self.servo_port, self.servo_serial)
534 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700535 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700536 self._repair_strategy.repair(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700537 self._servo_state = servo_constants.SERVO_STATE_WORKING
538 self.record('INFO', None, None,
539 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700540 # If target is a labstation then try to withdraw any existing
541 # reboot request created by this servo because it passed repair.
542 if self.is_labstation():
543 self.withdraw_reboot_request()
Garry Wang63b8c382020-03-11 22:28:40 -0700544 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700545 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700546 self._servo_state = self.determine_servo_state()
547 self.record('INFO', None, None,
548 'ServoHost repair set servo_state as %s'
549 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700550 if self._is_critical_error(e):
551 self.disconnect_servo()
552 self.stop_servod()
553 raise
554
Garry Wang63b8c382020-03-11 22:28:40 -0700555 def _is_critical_error(self, error):
556 if (isinstance(error, hosts.AutoservVerifyDependencyError)
557 and not error.is_critical()):
558 logging.warning('Non-critical verify failure(s) detected during'
559 ' verify/repair servo, servo connection will'
560 ' still up but may not fully functional.'
561 ' Some repair actions and servo depended'
562 ' tests may not run.')
563 return False
564 logging.info('Critical verify failure(s) detected during repair/verify'
565 ' servo. Disconnecting servo and stop servod, all repair '
566 'action and tests that depends on servo will not run.')
567 return True
Fang Deng5d518f42013-08-02 14:04:32 -0700568
Dan Shi4d478522014-02-14 13:46:32 -0800569 def get_servo(self):
570 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700571
Dan Shi4d478522014-02-14 13:46:32 -0800572 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700573 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700574 """
Dan Shi4d478522014-02-14 13:46:32 -0800575 return self._servo
576
Garry Wang79e9af62019-06-12 15:19:19 -0700577 def request_reboot(self):
578 """Request servohost to be rebooted when it's safe to by touch a file.
579 """
580 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700581 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700582 self.run('touch %s' % self._reboot_file, ignore_status=True)
583
Garry Wang464ff1e2019-07-18 17:20:34 -0700584 def withdraw_reboot_request(self):
585 """Withdraw a servohost reboot request if exists by remove the flag
586 file.
587 """
588 logging.debug('Withdrawing request to reboot servohost %s that created'
589 ' by servo with port # %s if exists.',
590 self.hostname, self.servo_port)
591 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
592
Garry Wangc1288cf2019-12-17 14:58:00 -0800593 def start_servod(self, quick_startup=False):
594 """Start the servod process on servohost.
595 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800596 # Skip if running on the localhost.(crbug.com/1038168)
597 if self.is_localhost():
598 logging.debug("Servohost is a localhost, skipping start servod.")
599 return
600
601 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800602 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800603 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800604 if self.servo_model:
605 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800606 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800607 logging.warning('Board for DUT is unknown; starting servod'
608 ' assuming a pre-configured board.')
609
610 cmd += ' PORT=%d' % self.servo_port
611 if self.servo_serial:
612 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800613
614 # Start servod with dual_v4 if the DUT/servo from designated pools.
615 dut_host_info = self.get_dut_host_info()
616 if dut_host_info:
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700617 # DUAL_V4: servo setup includes servo_micro and ccd_cr50
618 # connection to the DUT
619 is_dual_setup = False
Garry Wang11b5e872020-03-11 15:14:08 -0700620 if bool(dut_host_info.pools &
621 servo_constants.POOLS_SUPPORT_DUAL_V4):
Garry Wangd7367482020-02-27 13:52:40 -0800622 logging.debug('The DUT is detected in following designated'
623 ' pools %s,starting servod with DUAL_V4 option.',
Garry Wang11b5e872020-03-11 15:14:08 -0700624 servo_constants.POOLS_SUPPORT_DUAL_V4)
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700625 is_dual_setup = True
626 elif dut_host_info.attributes.get('servo_setup') == 'DUAL_V4':
627 logging.debug('The DUT servo setup specified in config as '
628 ' "DUAL_V4"')
629 is_dual_setup = True
630 if is_dual_setup:
Garry Wangd7367482020-02-27 13:52:40 -0800631 cmd += ' DUAL_V4=1'
632
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800633 # Remove the symbolic links from the logs. This helps ensure that
634 # a failed servod instantiation does not cause us to grab old logs
635 # by mistake.
636 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800637 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800638
639 # There's a lag between when `start servod` completes and when
640 # the _ServodConnectionVerifier trigger can actually succeed.
641 # The call to time.sleep() below gives time to make sure that
642 # the trigger won't fail after we return.
643
644 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
645 # But in the rare case all servo on a labstation are in heavy use they
646 # may take ~30 seconds. So the timeout value will double these value,
647 # and we'll try quick start up when first time initialize servohost,
648 # and use standard start up timeout in repair.
649 if quick_startup:
Garry Wang11b5e872020-03-11 15:14:08 -0700650 timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800651 else:
Garry Wang11b5e872020-03-11 15:14:08 -0700652 timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800653 logging.debug('Wait %s seconds for servod process fully up.', timeout)
654 time.sleep(timeout)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700655 # Cache the initial instance timestamp to check against servod restarts
656 self._initial_instance_ts = self.get_instance_logs_ts()
Garry Wangc1288cf2019-12-17 14:58:00 -0800657
Garry Wangc1288cf2019-12-17 14:58:00 -0800658 def stop_servod(self):
659 """Stop the servod process on servohost.
660 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800661 # Skip if running on the localhost.(crbug.com/1038168)
662 if self.is_localhost():
663 logging.debug("Servohost is a localhost, skipping stop servod.")
664 return
665
Garry Wangc1288cf2019-12-17 14:58:00 -0800666 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800667 self.run('stop servod PORT=%d' % self.servo_port,
668 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800669 logging.debug('Wait %s seconds for servod process fully teardown.',
Garry Wang11b5e872020-03-11 15:14:08 -0700670 servo_constants.SERVOD_TEARDOWN_TIMEOUT)
671 time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
Garry Wangc1288cf2019-12-17 14:58:00 -0800672
Garry Wangc1288cf2019-12-17 14:58:00 -0800673 def restart_servod(self, quick_startup=False):
674 """Restart the servod process on servohost.
675 """
676 self.stop_servod()
677 self.start_servod(quick_startup)
678
Garry Wangffbd2162020-04-17 16:13:48 -0700679 def _process_servodtool_error(self, response):
680 """Helper function to handle non-zero servodtool response.
681 """
682 if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
Garry Wangad245002020-05-15 15:20:23 -0700683 response.stdout):
Garry Wangffbd2162020-04-17 16:13:48 -0700684 logging.error('The servo is not plugged on a usb hub that supports'
685 ' power-cycle!')
Garry Wang000c6c02020-05-11 21:27:23 -0700686 # change the flag so we can update this label in later process.
687 self.smart_usbhub = False
Garry Wangffbd2162020-04-17 16:13:48 -0700688 return
689
690 if re.search(servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
691 self.servo_serial, response.stdout):
692 logging.error('No servo with serial %s found!', self.servo_serial)
693 return
694
695 logging.error('Unexpected error occurred from usbhub control, please'
696 ' file a bug and inform chrome-fleet-software@ team!')
697
Otabek Kasimov545739c2020-08-20 00:24:21 -0700698 def _is_usbc_pigtail_connection_timeout(self):
699 """Check if servo has issue with USBC pigtail connection timeout.
700
701 The usb_console has to be clean for good servo. If console generate
702 messages like (below) then issue is present:
703 [475635.427072 PD TMOUT RX 1/1]
704 RXERR1 Preamble
705 [475635.476044 PD TMOUT RX 1/1]
706 RXERR1 Preamble
707 """
708 if not self.servo_serial:
709 return False
710 logging.debug('Starting check if USBC pigtail connection timeout.')
711 try:
712 cmd = 'usb_console -d 18d1:501b -s %s' % self.servo_serial
713 resp = self.run(cmd, timeout=self.DEFAULT_TERMINAL_TIMEOUT)
714 result_lines = resp.stdout.splitlines()
715 for line in result_lines:
716 if re.match(self.USBC_PIGTAIL_TIMEOUT_RE, line):
717 return True
718 except Exception as e:
719 logging.debug('(Non-critical) %s.', e)
720 return False
721
722 def _reset_usbc_pigtail_connection(self):
723 """Reset USBC pigtail connection on servo board.
724
725 To reset need to run 'cc off' and then 'cc srcdts' in usb_console.
726 """
727 if not self.servo_serial:
728 return False
729 logging.debug('Starting reset USBC pigtail connection.')
730 def _run_command(cc_command):
731 """Run configuration chanel commands.
732
733 @returns: True if pas successful and False if fail.
734 """
735 try:
736 cmd = (r"echo 'cc %s' | usb_console -d 18d1:501b -s %s"
737 % (cc_command, self.servo_serial))
738 resp = self.run(cmd, timeout=self.DEFAULT_TERMINAL_TIMEOUT)
739 return True
740 except Exception as e:
741 logging.info('(Non-critical) %s.', e)
742 return False
743
744 logging.info('Turn off configuration channel. And wait 5 seconds.')
745 if _run_command('off'):
746 # wait till command will be effected
747 time.sleep(5)
748 logging.info('Turn on configuration channel. '
749 'And wait 15 seconds.')
750 if _run_command('srcdts'):
751 # wait till command will be effected
752 time.sleep(15)
753
754 def reset_usbc_pigtail_connection_on_need(self):
755 """Reset USBC pitgtail issue if it present."""
756 if not self.is_labstation():
757 logging.info('USBC pigtail reset applicable only for labstations')
758 return
759
760 if self._is_usbc_pigtail_connection_timeout():
761 logging.info('USBC pigtail issue detected on servo.')
762 self._reset_usbc_pigtail_connection()
763 fields = self._get_host_metrics_data()
764 fields['success'] = not self._is_usbc_pigtail_connection_timeout()
765 metrics.Counter(
766 'chromeos/autotest/repair/servo_usbc/reset'
767 ).increment(fields=fields)
Garry Wangffbd2162020-04-17 16:13:48 -0700768
769 def _get_servo_usb_devnum(self):
770 """Helper function to collect current usb devnum of servo.
771 """
Otabek Kasimov09192682020-06-01 18:17:44 -0700772 # TODO remove try-except when fix crbug.com/1087964
773 try:
774 cmd = 'servodtool device -s %s usb-path' % self.servo_serial
775 resp = self.run(cmd, ignore_status=True, timeout=30)
776 except Exception as e:
777 # Here we catch only timeout errors.
778 # Other errors is filtered by ignore_status=True
779 logging.debug('Attempt to get servo usb-path failed due to '
780 'timeout; %s', e)
781 return ''
782
Garry Wangffbd2162020-04-17 16:13:48 -0700783 if resp.exit_status != 0:
784 self._process_servodtool_error(resp)
785 return ''
786 usb_path = resp.stdout.strip()
787 logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
788
789 resp = self.run('cat %s/devnum' % usb_path,
790 ignore_status=True)
791 if resp.exit_status != 0:
792 self._process_servodtool_error(resp)
793 return ''
794 return resp.stdout.strip()
795
Garry Wang358aad42020-08-02 20:56:04 -0700796 def reboot_servo_v3_on_need(self):
797 """Check and reboot servo_v3 based on below conditions.
798 1. If there is an update pending on reboot.
799 2. Servo_v3 has been up for more than 96 hours.
800 """
801 if self.get_board() != 'beaglebone_servo':
802 logging.info('Servo reboot is only applicable for servo V3.')
Otabek Kasimove6df8102020-07-21 20:15:25 -0700803 return
804
Garry Wang358aad42020-08-02 20:56:04 -0700805 update_pending_reboot = (self._check_update_status() ==
806 self.UPDATE_STATE.PENDING_REBOOT)
807 uptime_hours = float(self.check_uptime())/3600
808 logging.info('Uptime of servo_v3: %s hour(s)', uptime_hours)
809 long_up_time = uptime_hours > 96
810
811 # Skip reboot if neither condition are met.
812 if not (update_pending_reboot or long_up_time):
Otabek Kasimove6df8102020-07-21 20:15:25 -0700813 return
814
Garry Wang358aad42020-08-02 20:56:04 -0700815 if update_pending_reboot:
816 message = 'Starting reboot servo_v3 because an update is pending.'
817 reboot_method = self._post_update_reboot
818 elif long_up_time:
819 message = 'Starting reboot servo_v3 because uptime > 96 hours.'
820 reboot_method = self._servo_host_reboot
821 self.record('INFO', None, None, message)
822 logging.info(message)
Otabek Kasimove6df8102020-07-21 20:15:25 -0700823 try:
Garry Wang358aad42020-08-02 20:56:04 -0700824 reboot_method()
Otabek Kasimove6df8102020-07-21 20:15:25 -0700825 message = 'Servo_v3 reboot completed successfully.'
826 except Exception as e:
827 logging.debug("Fail to reboot servo_v3; %s", e)
828 message = ('Servo_v3 reboot failed, please check debug log '
829 'for details.')
830 logging.info(message)
831 self.record('INFO', None, None, message)
Garry Wangffbd2162020-04-17 16:13:48 -0700832
833 def _reset_servo(self):
834 logging.info('Resetting servo through smart usbhub.')
Otabek Kasimov09192682020-06-01 18:17:44 -0700835 # TODO remove try-except when fix crbug.com/1087964
836 try:
837 resp = self.run('servodtool device -s %s power-cycle' %
838 self.servo_serial, ignore_status=True,
839 timeout=30)
840 if resp.exit_status != 0:
841 self._process_servodtool_error(resp)
842 return False
843 except Exception as e:
844 # Here we catch only timeout errors.
845 # Other errors is filtered by ignore_status=True
846 logging.debug('Attempt to reset servo failed due to timeout;'
847 ' %s', e)
Garry Wangffbd2162020-04-17 16:13:48 -0700848 return False
849
850 logging.debug('Wait %s seconds for servo to come back from reset.',
851 servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
852 time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
Garry Wang000c6c02020-05-11 21:27:23 -0700853 # change the flag so we can update this label in later process.
854 self.smart_usbhub = True
Garry Wangffbd2162020-04-17 16:13:48 -0700855 return True
856
Garry Wangffbd2162020-04-17 16:13:48 -0700857 def reset_servo(self):
858 """Reset(power-cycle) the servo via smart usbhub.
859 """
860 if not self.is_labstation():
861 logging.info('Servo reset is not applicable to servo_v3.')
862 return
863
864 pre_reset_devnum = self._get_servo_usb_devnum()
865 logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
866 result = self._reset_servo()
867 if not result:
Garry Wangfd5c8b62020-06-08 15:36:54 -0700868 message = ('Failed to reset servo with serial: %s. (Please ignore'
869 ' this error if the DUT is not connected to a smart'
870 ' usbhub).' % self.servo_serial)
Garry Wangffbd2162020-04-17 16:13:48 -0700871 logging.warning(message)
872 self.record('INFO', None, None, message)
873 return
874
875 post_reset_devnum = self._get_servo_usb_devnum()
876 logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
877 if not (pre_reset_devnum and post_reset_devnum):
878 message = ('Servo reset completed but unable to verify'
879 ' devnum change!')
880 elif pre_reset_devnum != post_reset_devnum:
881 message = ('Reset servo with serial %s completed successfully!'
882 % self.servo_serial)
883 else:
884 message = 'Servo reset completed but devnum is still not changed!'
885 logging.info(message)
886 self.record('INFO', None, None, message)
887
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800888 def _extract_compressed_logs(self, logdir, relevant_files):
889 """Decompress servod logs in |logdir|.
890
891 @param logdir: directory containing compressed servod logs.
892 @param relevant_files: list of files in |logdir| to consider.
893
894 @returns: tuple, (tarfiles, files) where
895 tarfiles: list of the compressed filenames that have been
896 extracted and deleted
897 files: list of the uncompressed files that were generated
898 """
899 # For all tar-files, first extract them to the directory, and
900 # then let the common flow handle them.
901 tarfiles = [cf for cf in relevant_files if
902 cf.endswith(self.COMPRESSION_SUFFIX)]
903 files = []
904 for f in tarfiles:
905 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
906 with tarfile.open(f) as tf:
907 # Each tarfile has only one member, as
908 # that's the compressed log.
909 member = tf.members[0]
910 # Manipulate so that it only extracts the basename, and not
911 # the directories etc.
912 member.name = norm_name
913 files.append(os.path.join(logdir, member.name))
914 tf.extract(member, logdir)
915 # File has been extracted: remove the compressed file.
916 os.remove(f)
917 return tarfiles, files
918
919 def _extract_mcu_logs(self, log_subdir):
920 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
921
922 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
923 lines from the logs to generate invidiual console logs e.g. after
924 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
925 those MCUs had any console input/output.
926
927 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
928 """
929 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
930 # files
931 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
932 if not os.path.exists(mcu_lines_file):
933 logging.info('No DEBUG logs found to extract MCU logs from.')
934 return
935 mcu_files = {}
936 mcu_file_template = '%s.txt'
937 with open(mcu_lines_file, 'r') as f:
938 for line in f:
939 match = self.MCU_EXTRACTOR.match(line)
940 if match:
941 mcu = match.group(self.MCU_GROUP).lower()
942 line = match.group(self.LINE_GROUP)
943 if mcu not in mcu_files:
944 mcu_file = os.path.join(log_subdir,
945 mcu_file_template % mcu)
946 mcu_files[mcu] = open(mcu_file, 'a')
947 fd = mcu_files[mcu]
948 fd.write(line + '\n')
949 for f in mcu_files:
950 mcu_files[f].close()
951
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800952 def remove_latest_log_symlinks(self):
953 """Remove the conveninence symlinks 'latest' servod logs."""
954 symlink_wildcard = '%s/latest*' % self.remote_log_dir
955 cmd = 'rm ' + symlink_wildcard
956 self.run(cmd, stderr_tee=None, ignore_status=True)
957
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700958 def probe_servod_restart(self, instance_ts, outdir):
959 """Grab servod logs from previous instances if part of this session.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800960
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700961 If since the last time this host called start_servod() servod crashed
962 and restarted, this helper finds those logs as well, and stores them
963 with the |OLD_LOG_SUFFIX| to investigate if necessary.
Prasad Vuppalapu5bd9da12020-03-31 01:46:47 +0000964
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700965 It also issues a panicinfo command to servo devices after the restart
966 to try and collect reboot information for debugging.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800967
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700968 @param instance_ts: the log timestamp that the current instance uses
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800969 @param outdir: directory to create a subdirectory into to place the
970 servod logs into.
971 """
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700972 if self._initial_instance_ts is None:
973 logging.info('No log timestamp grabbed successfully on servod '
974 'startup. Cannot check device restarts. Ignoring.')
975 return
976 if instance_ts == self._initial_instance_ts:
977 logging.debug('Servod appears to have run without restarting')
978 return
979 # Servod seems to have restarted (at least once). |_initial_instance_ts|
980 # is the first timestamp, and instance_ts is the current timestamp. Find
981 # all timestamps in between them, and grab the logs for each.
982 tss = self._find_instance_timestamps_between(self._initial_instance_ts,
983 instance_ts)
984 logging.info('Servod has restarted %d times between the start and the '
985 'end of this servo_host.', len(tss))
986 logging.info('This might be an issue. Will extract all logs from each '
987 'instance.')
988 logging.info('Logs that are not the currently running (about to turn '
989 'down) instance are maked with a .%s in their folder.',
990 self.OLD_LOG_SUFFIX)
991 for ts in tss:
992 self.get_instance_logs(ts, outdir, old=True)
993 # Lastly, servod has restarted due to a potential issue. Try to get
994 # panic information from servo micro and servo v4 for the current logs.
995 # This can only happen if the |_servo| attribute is initialized.
996 if self._servo:
997 for mcu in ['servo_micro', 'servo_v4']:
998 ctrl = '%s_uart_cmd' % mcu
999 if self._servo.has_control(ctrl):
1000 logging.info('Trying to retrieve %r panicinfo into logs',
1001 mcu)
1002 try:
1003 self._servo.set_nocheck(ctrl, 'panicinfo')
1004 except error.TestFail as e:
1005 logging.error('Failed to generate panicinfo for %r '
1006 'logs. %s', mcu, str(e))
1007
1008 def _find_instance_timestamps_between(self, start_ts, end_ts):
1009 """Find all log timestamps between [start_ts, end_ts).
1010
1011 @param start_ts: str, earliest log timestamp of interest
1012 @param end_ts: str, latest log timestamp of interest
1013
1014 @returns: list, all timestamps between start_ts and end_ts, end_ts
1015 exclusive, on the servo_host. An empty list on errors
1016 """
1017 # Simply get all timestamp, and then sort and remove
1018 cmd = 'ls %s' % self.remote_log_dir
1019 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1020 if res.exit_status != 0:
1021 # Here we failed to find anything.
1022 logging.info('Failed to find remote servod logs. Ignoring.')
1023 return []
1024 logfiles = res.stdout.strip().split()
1025 timestamps = set()
1026 for logfile in logfiles:
1027 ts_match = self.TS_EXTRACTOR.match(logfile)
1028 if not ts_match:
1029 # Simply ignore files that fail the check. It might be the
1030 # 'latest' symlinks or random files.
1031 continue
1032 timestamps.add(ts_match.group(self.TS_GROUP))
1033 # At this point we have all unique timestamps.
1034 timestamps = sorted(timestamps)
1035 for ts in [start_ts, end_ts]:
1036 if ts not in timestamps:
1037 logging.error('Timestamp %r not in servod logs. Cannot query '
1038 'for timestamps in between %r and %r', ts,
1039 start_ts, end_ts)
1040 return []
1041 return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
1042
1043 def get_instance_logs_ts(self):
1044 """Retrieve the currently running servod instance's log timestamp
1045
1046 @returns: str, timestamp for current instance, or None on failure
1047 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001048 # First, extract the timestamp. This cmd gives the real filename of
1049 # the latest aka current log file.
1050 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
1051 'then realpath %(dir)s/latest.DEBUG;'
1052 'elif [ -f %(dir)s/latest ];'
1053 'then realpath %(dir)s/latest;'
1054 'else exit %(code)d;'
1055 'fi' % {'dir': self.remote_log_dir,
1056 'code': self.NO_SYMLINKS_CODE})
1057 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1058 if res.exit_status != 0:
1059 if res.exit_status == self.NO_SYMLINKS_CODE:
1060 logging.warning('servod log latest symlinks not found. '
1061 'This is likely due to an error starting up '
1062 'servod. Ignoring..')
1063 else:
1064 logging.warning('Failed to find servod logs on servo host.')
1065 logging.warning(res.stderr.strip())
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001066 return None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001067 fname = os.path.basename(res.stdout.strip())
1068 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
Ruben Rodriguez Buchillone9aa2b02020-03-04 12:14:28 -08001069 ts_match = self.TS_EXTRACTOR.match(fname)
1070 if not ts_match:
1071 logging.warning('Failed to extract timestamp from servod log file '
1072 '%r. Skipping. The servo host is using outdated '
1073 'servod logging and needs to be updated.', fname)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001074 return None
1075 return ts_match.group(self.TS_GROUP)
1076
1077 def get_instance_logs(self, instance_ts, outdir, old=False):
1078 """Collect all logs with |instance_ts| and dump into a dir in |outdir|
1079
1080 This method first collects all logs on the servo_host side pertaining
1081 to this servod instance (port, instatiation). It glues them together
1082 into combined log.[level].txt files and extracts all available MCU
1083 console I/O from the logs into individual files e.g. servo_v4.txt
1084
1085 All the output can be found in a directory inside |outdir| that
1086 this generates based on |LOG_DIR|, the servod port, and the instance
1087 timestamp on the servo_host side.
1088
1089 @param instance_ts: log timestamp to grab logfiles for
1090 @param outdir: directory to create a subdirectory into to place the
1091 servod logs into.
1092 @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
1093 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001094 # Create the local results log dir.
1095 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
1096 str(self.servo_port),
1097 instance_ts))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001098 if old:
1099 log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
1100 logging.info('Saving servod logs to %r.', log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001101 os.mkdir(log_dir)
1102 # Now, get all files with that timestamp.
1103 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
1104 instance_ts)
1105 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1106 files = res.stdout.strip().split()
1107 try:
1108 self.get_file(files, log_dir, try_rsync=False)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001109 if not os.listdir(log_dir):
1110 logging.info('No servod logs retrieved. Ignoring, and removing '
1111 '%r again.', log_dir)
1112 os.rmdir(log_dir)
1113 return
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001114 except error.AutoservRunError as e:
1115 result = e.result_obj
1116 if result.exit_status != 0:
1117 stderr = result.stderr.strip()
1118 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
1119 stderr or '\n%s' % result)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001120 # Remove the log_dir as nothing was added to it.
1121 os.rmdir(log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001122 return
1123 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
1124 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
1125 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
1126 # Create the joint files for each loglevel. i.e log.DEBUG
1127 joint_file = self.JOINT_LOG_PREFIX
1128 if level_name:
1129 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
1130 # This helps with some online tools to avoid complaints about an
1131 # unknown filetype.
1132 joint_file = joint_file + '.txt'
1133 joint_path = os.path.join(log_dir, joint_file)
1134 files = [f for f in local_files if level_name in f]
1135 if not files:
1136 # TODO(crrev.com/c/1793030): remove no-level case once CL
1137 # is pushed
1138 continue
1139 # Extract compressed logs if any.
1140 compressed, extracted = self._extract_compressed_logs(log_dir,
1141 files)
1142 files = list(set(files) - set(compressed))
1143 files.extend(extracted)
1144 # Need to sort. As they all share the same timestamp, and
1145 # loglevel, the index itself is sufficient. The highest index
1146 # is the oldest file, therefore we need a descending sort.
1147 def sortkey(f, level=level_name):
1148 """Custom sortkey to sort based on rotation number int."""
1149 if f.endswith(level_name): return 0
1150 return int(f.split('.')[-1])
1151
1152 files.sort(reverse=True, key=sortkey)
1153 # Just rename the first file rather than building from scratch.
1154 os.rename(files[0], joint_path)
1155 with open(joint_path, 'a') as joint_f:
1156 for logfile in files[1:]:
1157 # Transfer the file to the joint file line by line.
1158 with open(logfile, 'r') as log_f:
1159 for line in log_f:
1160 joint_f.write(line)
1161 # File has been written over. Delete safely.
1162 os.remove(logfile)
1163 # Need to remove all files form |local_files| so we don't
1164 # analyze them again.
1165 local_files = list(set(local_files) - set(files) - set(compressed))
1166 # Lastly, extract MCU logs from the joint logs.
1167 self._extract_mcu_logs(log_dir)
1168
Garry Wang79e9af62019-06-12 15:19:19 -07001169 def _lock(self):
1170 """lock servohost by touching a file.
1171 """
1172 logging.debug('Locking servohost %s by touching %s file',
1173 self.hostname, self._lock_file)
1174 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001175 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -07001176
Garry Wang79e9af62019-06-12 15:19:19 -07001177 def _unlock(self):
1178 """Unlock servohost by removing the lock file.
1179 """
1180 logging.debug('Unlocking servohost by removing %s file',
1181 self._lock_file)
1182 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001183 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -07001184
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001185 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -07001186 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001187 # NOTE: throughout this method there are multiple attempts to catch
1188 # all errors. This is WAI as log grabbing should not fail tests.
1189 # However, the goal is to catch and handle/process all errors, thus
1190 # we print the traceback and ask for a bug.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001191 if self._closed:
1192 logging.debug('ServoHost is already closed.')
1193 return
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001194 instance_ts = self.get_instance_logs_ts()
1195 # TODO(crbug.com/1011516): once enabled, remove the check against
1196 # localhost and instead check against log-rotiation enablement.
1197 logs_available = (instance_ts is not None and
1198 self.job and
1199 not self.is_localhost())
1200 if logs_available:
1201 # Probe whether there was a servod restart, and grab those old
1202 # logs as well.
1203 try:
1204 self.probe_servod_restart(instance_ts, self.job.resultdir)
1205 except (error.AutoservRunError, error.TestFail) as e:
1206 logging.info('Failed to grab servo logs due to: %s. '
1207 'This error is forgiven.', str(e))
1208 except Exception as e:
1209 logging.error('Unexpected error probing for old logs. %s. '
1210 'Forgiven. Please file a bug and fix or catch '
1211 'in log probing function', str(e),
1212 exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001213 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001214 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -07001215 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001216 self._servo.close(outdir)
1217
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001218 if logs_available:
1219 # Grab current (not old like above) logs after the servo instance
1220 # was closed out.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001221 try:
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001222 self.get_instance_logs(instance_ts, self.job.resultdir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001223 except error.AutoservRunError as e:
1224 logging.info('Failed to grab servo logs due to: %s. '
1225 'This error is forgiven.', str(e))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001226 except Exception as e:
1227 logging.error('Unexpected error grabbing servod logs. %s. '
1228 'Forgiven. Please file a bug and fix or catch '
1229 'in log grabbing function', str(e), exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001230
Garry Wang7c00b0f2019-06-25 17:28:17 -07001231 if self._is_locked:
1232 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -07001233 try:
1234 self._unlock()
1235 except error.AutoservSSHTimeout:
1236 logging.error('Unlock servohost failed due to ssh timeout.'
1237 ' It may caused by servohost went down during'
1238 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -08001239 # We want always stop servod after task to minimum the impact of bad
1240 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -08001241 try:
1242 self.stop_servod()
1243 except error.AutoservRunError as e:
1244 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001245 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -08001246
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001247 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001248 # Mark closed.
1249 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001250
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001251 def get_servo_state(self):
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001252 return self._servo_state
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001253
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001254 def _get_host_metrics_data(self):
1255 return {'port': self.servo_port,
Otabek Kasimov0ea47362020-07-11 20:55:09 -07001256 'host': self.get_dut_hostname() or self.hostname,
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001257 'board': self.servo_board or ''}
1258
1259 def _is_servo_device_connected(self, servo_type, serial):
1260 """Check if device is connected to the labstation.
1261
1262 Works for all servo devices connected to the labstation.
1263 For servo_v3 please use 'self._is_servo_board_present_on_servo_v3'
1264
1265 @param servo_type: The type of servo device. Expecting value can be
1266 servo_v4 or servo_micro.
1267 @param serial: The serial number of the device to detect it.
1268 """
1269 vid_pid = self.SERVO_VID_PID.get(servo_type)
1270 if not vid_pid or not serial:
1271 # device cannot detected without VID/PID or serial number
1272 return False
1273 logging.debug('Started to detect %s', servo_type)
1274 try:
1275 cmd = 'lsusb -v -d %s |grep iSerial |grep %s' % (vid_pid, serial)
1276 result = self.run(cmd, ignore_status=True, timeout=30)
1277 if result.exit_status == 0 and result.stdout.strip():
1278 logging.debug('The %s is plugged in to the host.', servo_type)
1279 return True
1280 logging.debug('%s device is not detected; %s', servo_type, result)
1281 return False
1282 except Exception as e:
1283 # can be triggered by timeout issue due running the script
1284 metrics.Counter(
1285 'chromeos/autotest/repair/servo_detection/timeout'
1286 ).increment(fields=self._get_host_metrics_data())
1287 logging.error('%s device is not detected; %s', servo_type, str(e))
1288 return None
1289
1290 def _is_servo_board_present_on_servo_v3(self):
1291 """Check if servo board is detected on servo_v3"""
1292 vid_pids = self.SERVO_VID_PID['servo_v3']
1293 if not vid_pids or len(vid_pids) == 0:
1294 # device cannot detected without VID/PID
1295 return False
1296 logging.debug('Started to detect servo board on servo_v3')
1297 not_detected = 'The servo board is not detected on servo_v3'
1298 try:
1299 cmd = 'lsusb | grep "%s"' % "\|".join(vid_pids)
1300 result = self.run(cmd, ignore_status=True, timeout=30)
1301 if result.exit_status == 0 and result.stdout.strip():
1302 logging.debug('The servo board is detected on servo_v3')
1303 return True
1304 logging.debug('%s; %s', not_detected, result)
1305 return False
1306 except Exception as e:
1307 # can be triggered by timeout issue due running the script
1308 metrics.Counter(
1309 'chromeos/autotest/repair/servo_detection/timeout'
1310 ).increment(fields=self._get_host_metrics_data())
1311 logging.error('%s; %s', not_detected, str(e))
1312 return None
1313
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001314 def _is_main_device_not_detected_on_servo_v4(self):
1315 """Check if servod cannot find main device on servo.
1316
1317 The check based on reading servod logs for servo_v4.
1318 """
1319 if not self._initial_instance_ts:
1320 # latest log not found
1321 return False
1322 logging.debug('latest log for servod created at %s',
1323 self._initial_instance_ts)
1324 try:
1325 log_created = calendar.timegm(time.strptime(
1326 self._initial_instance_ts,
1327 "%Y-%m-%d--%H-%M-%S.%f"))
1328 except ValueError as e:
1329 logging.debug('Cannot read time from log file name: %s',
1330 self._initial_instance_ts)
1331 return False
1332 min_time_created = calendar.timegm(time.gmtime())
1333 if min_time_created > log_created + 3600:
1334 # the log file is old we cannot use it
1335 logging.debug('log file was created more than hour ago, too old')
1336 return False
1337 logging.debug('latest log was created not longer then 1 hour ago')
1338
1339 # check if servod can detect main device by servo_v4
1340 message = 'ERROR - No servo micro or CCD detected for board'
1341 cmd = ('cat /var/log/servod_%s/log.%s.INFO |grep "%s"'
1342 % (self.servo_port, self._initial_instance_ts, message))
1343 result = self.run(cmd, ignore_status=True)
1344 if result.stdout.strip():
1345 logging.info('Servod cannot detect main device on the servo; '
1346 'Can be caused by bad hardware of servo or '
1347 'issue on the DUT side.')
1348 return True
1349 logging.debug('The main device is detected')
1350 return False
1351
Otabek Kasimov15963492020-06-23 21:10:51 -07001352 def get_verify_state(self, tag):
1353 """Return the state of servo verifier.
1354
1355 @returns: bool or None
1356 """
1357 return self._repair_strategy.verifier_is_good(tag)
1358
1359 def determine_servo_state(self):
1360 """Determine servo state based on the failed verifier.
1361
1362 @returns: servo state value
1363 The state detecting based on first fail verifier or collecting of
1364 them.
1365 """
1366 ssh = self.get_verify_state('servo_ssh')
1367 disk_space = self.get_verify_state('disk_space')
1368 start_servod = self.get_verify_state('servod_job')
1369 create_servo = self.get_verify_state('servod_connection')
1370 init_servo = self.get_verify_state('servod_control')
1371 pwr_button = self.get_verify_state('pwr_button')
1372 lid_open = self.get_verify_state('lid_open')
1373 ec_board = self.get_verify_state('ec_board')
1374 ccd_testlab = self.get_verify_state('ccd_testlab')
1375
1376 if not ssh:
1377 return servo_constants.SERVO_STATE_NO_SSH
1378
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001379 if (start_servod == self.VERIFY_FAILED
1380 or create_servo == self.VERIFY_FAILED):
1381 # sometimes servo can start with out present servo
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001382 if self.is_labstation():
1383 if not self.servo_serial:
1384 return servo_constants.SERVO_STATE_WRONG_CONFIG
1385 if self._is_servo_device_connected(
1386 'servo_v4',
1387 self.servo_serial) == False:
1388 return servo_constants.SERVO_STATE_NOT_CONNECTED
1389 elif self._is_servo_board_present_on_servo_v3() == False:
1390 return servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001391
1392 if start_servod == self.VERIFY_FAILED:
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001393 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1394
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001395 if create_servo == self.VERIFY_FAILED:
1396 if (self.is_labstation()
1397 and self._is_main_device_not_detected_on_servo_v4()):
1398 servo_type = None
1399 if self.get_dut_host_info():
1400 servo_type = self.get_dut_host_info().get_label_value(
1401 servo_constants.SERVO_TYPE_LABEL_PREFIX)
1402 if servo_type and 'servo_micro' in servo_type:
1403 serial = self.get_servo_micro_serial_number()
1404 logging.debug('servo_micro serial: %s', serial)
1405 if self._is_servo_device_detected('servo_micro',
1406 serial):
1407 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1408 # Device can be not detected because of DUT
1409 # TODO (otabek) update after b/159755652 and b/159754985
1410 metrics.Counter(
1411 'chromeos/autotest/repair/servo_state/needs_replacement'
1412 ).increment(fields=self._get_host_metrics_data())
1413 elif not self.is_labstation():
1414 # Here need logic to check if flex cable is connected
1415 pass
1416
Otabek Kasimov15963492020-06-23 21:10:51 -07001417 # one of the reason why servo can not initialized
1418 if ccd_testlab == self.VERIFY_FAILED:
1419 return servo_constants.SERVO_STATE_CCD_TESTLAB_ISSUE
1420
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001421 if (create_servo == self.VERIFY_FAILED
1422 or init_servo == self.VERIFY_FAILED):
Otabek Kasimov15963492020-06-23 21:10:51 -07001423 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1424
Otabek Kasimov015c15c2020-08-20 00:40:42 -07001425 if ec_board == self.VERIFY_FAILED:
1426 return servo_constants.SERVO_STATE_EC_BROKEN
Otabek Kasimov15963492020-06-23 21:10:51 -07001427 if pwr_button == self.VERIFY_FAILED:
1428 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1429 if lid_open == self.VERIFY_FAILED:
1430 return servo_constants.SERVO_STATE_LID_OPEN_FAILED
Otabek Kasimov15963492020-06-23 21:10:51 -07001431
Otabek Kasimov15963492020-06-23 21:10:51 -07001432 metrics.Counter(
1433 'chromeos/autotest/repair/unknown_servo_state'
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001434 ).increment(fields=self._get_host_metrics_data())
Otabek Kasimov15963492020-06-23 21:10:51 -07001435 logging.info('We do not have special state for this failure yet :)')
1436 return servo_constants.SERVO_STATE_BROKEN
1437
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001438
Richard Barnetteea3e4602016-06-10 12:36:41 -07001439def make_servo_hostname(dut_hostname):
1440 """Given a DUT's hostname, return the hostname of its servo.
1441
1442 @param dut_hostname: hostname of a DUT.
1443
1444 @return hostname of the DUT's servo.
1445
1446 """
1447 host_parts = dut_hostname.split('.')
1448 host_parts[0] = host_parts[0] + '-servo'
1449 return '.'.join(host_parts)
1450
1451
1452def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001453 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001454
1455 @param servo_hostname: hostname of the servo host.
1456
1457 @return True if it's up, False otherwise
1458 """
1459 # Technically, this duplicates the SSH ping done early in the servo
1460 # proxy initialization code. However, this ping ends in a couple
1461 # seconds when if fails, rather than the 60 seconds it takes to decide
1462 # that an SSH ping has timed out. Specifically, that timeout happens
1463 # when our servo DNS name resolves, but there is no host at that IP.
1464 logging.info('Pinging servo host at %s', servo_hostname)
1465 ping_config = ping_runner.PingConfig(
1466 servo_hostname, count=3,
1467 ignore_result=True, ignore_status=True)
1468 return ping_runner.PingRunner().ping(ping_config).received > 0
1469
1470
Richard Barnettee519dcd2016-08-15 17:37:17 -07001471def _map_afe_board_to_servo_board(afe_board):
1472 """Map a board we get from the AFE to a servo appropriate value.
1473
1474 Many boards are identical to other boards for servo's purposes.
1475 This function makes that mapping.
1476
1477 @param afe_board string board name received from AFE.
1478 @return board we expect servo to have.
1479
1480 """
1481 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1482 BOARD_MAP = {'gizmo': 'panther'}
1483 mapped_board = afe_board
1484 if afe_board in BOARD_MAP:
1485 mapped_board = BOARD_MAP[afe_board]
1486 else:
1487 for suffix in KNOWN_SUFFIXES:
1488 if afe_board.endswith(suffix):
1489 mapped_board = afe_board[0:-len(suffix)]
1490 break
1491 if mapped_board != afe_board:
1492 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1493 return mapped_board
1494
1495
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001496def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001497 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001498
Richard Barnetteea3e4602016-06-10 12:36:41 -07001499 @param dut_host Instance of `Host` on which to find the servo
1500 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -07001501 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001502 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001503 info = dut_host.host_info_store.get()
Derek Beckettf73baca2020-08-19 15:08:47 -07001504 servo_args = {k: v for k, v in six.iteritems(info.attributes)
Garry Wang11b5e872020-03-11 15:14:08 -07001505 if k in servo_constants.SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -07001506
Garry Wang11b5e872020-03-11 15:14:08 -07001507 if servo_constants.SERVO_PORT_ATTR in servo_args:
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001508 try:
Garry Wang11b5e872020-03-11 15:14:08 -07001509 servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1510 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001511 except ValueError:
1512 logging.error('servo port is not an int: %s',
Garry Wang11b5e872020-03-11 15:14:08 -07001513 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001514 # Reset servo_args because we don't want to use an invalid port.
Garry Wang11b5e872020-03-11 15:14:08 -07001515 servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001516
1517 if info.board:
Garry Wang11b5e872020-03-11 15:14:08 -07001518 servo_board = _map_afe_board_to_servo_board(info.board)
1519 servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -07001520 if info.model:
Garry Wang11b5e872020-03-11 15:14:08 -07001521 servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1522 return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -07001523
1524
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001525def _tweak_args_for_ssp_moblab(servo_args):
Garry Wang11b5e872020-03-11 15:14:08 -07001526 if (servo_args[servo_constants.SERVO_HOST_ATTR]
1527 in ['localhost', '127.0.0.1']):
1528 servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001529 'SSP', 'host_container_ip', type=str, default=None)
1530
1531
Dan Shi023aae32016-05-25 11:13:01 -07001532def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001533 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001534 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -08001535
Richard Barnette9a26ad62016-06-10 12:03:08 -07001536 This function attempts to create and verify or repair a `ServoHost`
1537 object for a servo connected to the given `dut`, subject to various
1538 constraints imposed by the parameters:
1539 * When the `servo_args` parameter is not `None`, a servo
1540 host must be created, and must be checked with `repair()`.
1541 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1542 true:
1543 * If `try_servo_repair` is true, then create a servo host and
1544 check it with `repair()`.
1545 * Otherwise, if the servo responds to `ping` then create a
1546 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -08001547
Richard Barnette9a26ad62016-06-10 12:03:08 -07001548 In cases where `servo_args` was not `None`, repair failure
1549 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001550 are logged and then discarded. Note that this only happens in cases
1551 where we're called from a test (not special task) control file that
1552 has an explicit dependency on servo. In that case, we require that
1553 repair not write to `status.log`, so as to avoid polluting test
1554 results.
1555
1556 TODO(jrbarnette): The special handling for servo in test control
1557 files is a thorn in my flesh; I dearly hope to see it cut out before
1558 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001559
1560 Parameters for a servo host consist of a host name, port number, and
1561 DUT board, and are determined from one of these sources, in order of
1562 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -07001563 * Servo attributes from the `dut` parameter take precedence over
1564 all other sources of information.
1565 * If a DNS entry for the servo based on the DUT hostname exists in
1566 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -07001567 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001568 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -07001569 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001570
1571 @param dut An instance of `Host` from which to take
1572 servo parameters (if available).
1573 @param servo_args A dictionary with servo parameters to use if
1574 they can't be found from `dut`. If this
1575 argument is supplied, unrepaired exceptions
1576 from `verify()` will be passed back to the
1577 caller.
1578 @param try_lab_servo If not true, servo host creation will be
1579 skipped unless otherwise required by the
1580 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001581 @param try_servo_repair If true, check a servo host with
1582 `repair()` instead of `verify()`.
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001583 @param dut_host_info: A HostInfo object of the DUT that connected
1584 to this servo.
Dan Shi4d478522014-02-14 13:46:32 -08001585
1586 @returns: A ServoHost object or None. See comments above.
1587
1588 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001589 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001590 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001591 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -07001592 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001593 if utils.in_moblab_ssp():
1594 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001595 logging.debug(
1596 'Overriding provided servo_args (%s) with arguments'
1597 ' determined from the host (%s)',
1598 servo_args,
1599 servo_args_override,
1600 )
Richard Barnetteea3e4602016-06-10 12:36:41 -07001601 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001602
Richard Barnetteea3e4602016-06-10 12:36:41 -07001603 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001604 logging.debug('No servo_args provided, and failed to find overrides.')
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001605 if try_lab_servo or servo_dependency:
Otabek Kasimov646812c2020-06-23 20:01:36 -07001606 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001607 else:
1608 # For regular test case which not required the servo
1609 return None, None
1610
Garry Wang11b5e872020-03-11 15:14:08 -07001611 servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1612 servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001613 if not _is_servo_host_information_exist(servo_hostname, servo_port):
1614 logging.debug(
1615 'Servo connection info missed hostname: %s , port: %s',
1616 servo_hostname, servo_port)
Otabek Kasimov646812c2020-06-23 20:01:36 -07001617 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001618 if not is_servo_host_information_valid(servo_hostname, servo_port):
1619 logging.debug(
1620 'Servo connection info is incorrect hostname: %s , port: %s',
1621 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001622 return None, servo_constants.SERVO_STATE_WRONG_CONFIG
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001623 if (not servo_dependency and not try_servo_repair and
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001624 not servo_host_is_up(servo_hostname)):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001625 logging.debug('ServoHost is not up.')
Otabek Kasimov646812c2020-06-23 20:01:36 -07001626 return None, servo_constants.SERVO_STATE_NO_SSH
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001627
Garry Wangebc015b2019-06-06 17:45:06 -07001628 newhost = ServoHost(**servo_args)
Garry Wangffbd2162020-04-17 16:13:48 -07001629
Otabek Kasimove6df8102020-07-21 20:15:25 -07001630 # Reset or reboot servo device only during AdminRepair tasks.
1631 if try_servo_repair:
1632 if newhost._is_locked:
1633 # Reset servo if the servo is locked, as we check if the servohost
1634 # is up, if the servohost is labstation and if the servohost is in
1635 # lab inside the locking logic.
Otabek Kasimov545739c2020-08-20 00:24:21 -07001636 newhost.reset_usbc_pigtail_connection_on_need()
Otabek Kasimove6df8102020-07-21 20:15:25 -07001637 newhost.reset_servo()
1638 else:
Garry Wang358aad42020-08-02 20:56:04 -07001639 try:
1640 newhost.reboot_servo_v3_on_need()
Garry Wang1f0d5332020-08-10 19:32:32 -07001641 except Exception as e:
1642 logging.info('[Non-critical] Unexpected error while trying to'
1643 ' reboot servo_v3, skipping the reboot; %s', e)
Otabek Kasimove6df8102020-07-21 20:15:25 -07001644
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -07001645 if dut:
1646 newhost.set_dut_hostname(dut.hostname)
Otabek Kasimov9e90ae12020-08-14 03:01:19 -07001647 if dut_host_info:
1648 newhost.set_dut_host_info(dut_host_info)
Garry Wangffbd2162020-04-17 16:13:48 -07001649
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001650 if try_lab_servo or try_servo_repair:
1651 try:
1652 logging.info("Check and update servo firmware.")
1653 servo_updater.update_servo_firmware(
1654 newhost,
1655 force_update=False)
1656 except Exception as e:
1657 logging.error("Servo device update error: %s", e)
1658
Garry Wangcdd27b22020-01-13 14:59:11 -08001659 try:
1660 newhost.restart_servod(quick_startup=True)
1661 except error.AutoservSSHTimeout:
1662 logging.warning("Restart servod failed due ssh connection "
1663 "to servohost timed out. This error is forgiven"
1664 " here, we will retry in servo repair process.")
1665 except error.AutoservRunError as e:
1666 logging.warning("Restart servod failed due to:\n%s\n"
1667 "This error is forgiven here, we will retry"
1668 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -07001669
Richard Barnette9a26ad62016-06-10 12:03:08 -07001670 # Note that the logic of repair() includes everything done
1671 # by verify(). It's sufficient to call one or the other;
1672 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001673 if servo_dependency:
1674 newhost.repair(silent=True)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001675 return newhost, newhost.get_servo_state()
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001676
1677 if try_servo_repair:
1678 try:
1679 newhost.repair()
1680 except Exception:
1681 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -07001682 else:
1683 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001684 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001685 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001686 logging.exception('servo verify failed for %s', newhost.hostname)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001687 return newhost, newhost.get_servo_state()
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001688
1689
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001690def _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001691 if hostname is None or len(hostname.strip()) == 0:
1692 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001693 if port is None:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001694 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001695 if not type(port) is int:
1696 try:
1697 int(port)
1698 except ValueError:
1699 return False
1700
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001701 return True
1702
1703
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001704def is_servo_host_information_valid(hostname, port):
Garry Wang9b8f2342020-04-17 16:34:09 -07001705 """Check if provided servo attributes are valid.
1706
1707 @param hostname Hostname of the servohost.
1708 @param port servo port number.
1709
1710 @returns: A bool value to indicate if provided servo attribute valid.
1711 """
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001712 if not _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001713 return False
1714 # checking range and correct of the port
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001715 port_int = int(port)
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001716 if port_int < 1 or port_int > 65000:
1717 return False
1718 # we expecting host contain only latters, digits and '-' or '_'
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001719 if not re.match('[a-zA-Z0-9-_\.]*$', hostname) or len(hostname) < 5:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001720 return False
1721 return True