blob: 21b08adb89b3fbccdfe523749b2bcc03407ac97f [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
Fang Deng5d518f42013-08-02 14:04:32 -070012import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060013import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080014import re
15import tarfile
Andrew McRaef0679932020-08-13 09:15:23 +100016import threading
Garry Wangc1288cf2019-12-17 14:58:00 -080017import time
Dana Goyette4dc0adc2019-05-06 14:51:53 -070018import xmlrpclib
Otabek Kasimov120b6fa2020-07-03 00:15:27 -070019import calendar
Fang Deng5d518f42013-08-02 14:04:32 -070020
21from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070022from autotest_lib.client.common_lib import error
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Garry Wang7b0e1b72020-03-25 19:08:59 -070024from autotest_lib.client.common_lib import lsbrelease_utils
Fang Deng5d518f42013-08-02 14:04:32 -070025from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070027from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000028from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070029from autotest_lib.server.hosts import base_servohost
Garry Wang11b5e872020-03-11 15:14:08 -070030from autotest_lib.server.hosts import servo_constants
Otabek Kasimov4ea636e2020-04-14 23:35:06 -070031from autotest_lib.server.cros.faft.utils import config
Garry Wang11b5e872020-03-11 15:14:08 -070032from autotest_lib.client.common_lib import global_config
Otabek Kasimov8475cce2020-07-14 12:11:31 -070033from autotest_lib.site_utils.admin_audit import servo_updater
Garry Wangd7367482020-02-27 13:52:40 -080034
Otabek Kasimov15963492020-06-23 21:10:51 -070035try:
36 from chromite.lib import metrics
37except ImportError:
38 metrics = utils.metrics_mock
39
Dan Shi3b2adf62015-09-02 17:46:54 -070040_CONFIG = global_config.global_config
Fang Deng5d518f42013-08-02 14:04:32 -070041
Otabek Kasimova7ba91a2020-03-09 08:31:01 -070042
Garry Wangebc015b2019-06-06 17:45:06 -070043class ServoHost(base_servohost.BaseServoHost):
44 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070045 that with a servo instance for a specific port.
46
47 @type _servo: servo.Servo | None
48 """
Fang Deng5d518f42013-08-02 14:04:32 -070049
Raul E Rangel52ca2e82018-07-03 14:10:14 -060050 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070051
Dan Shie5b3c512014-08-21 12:12:09 -070052 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070053 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070054
Otabek Kasimov545739c2020-08-20 00:24:21 -070055 # Default timeout for run terminal command.
56 DEFAULT_TERMINAL_TIMEOUT = 30
57
xixuan6cf6d2f2016-01-29 15:29:00 -080058 # Ready test function
59 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070060
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080061 # Directory prefix on the servo host where the servod logs are stored.
62 SERVOD_LOG_PREFIX = '/var/log/servod'
63
64 # Exit code to use when symlinks for servod logs are not found.
65 NO_SYMLINKS_CODE = 9
66
67 # Directory in the job's results directory to dump the logs into.
68 LOG_DIR = 'servod'
69
70 # Prefix for joint loglevel files in the logs.
71 JOINT_LOG_PREFIX = 'log'
72
73 # Regex group to extract timestamp from logfile name.
74 TS_GROUP = 'ts'
75
76 # This regex is used to extract the timestamp from servod logs.
77 # files always start with log.
78 TS_RE = (r'log.'
79 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
80 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
81 # The loglevel is optional depending on labstation version.
82 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
83 TS_EXTRACTOR = re.compile(TS_RE)
84
85 # Regex group to extract MCU name from logline in servod logs.
86 MCU_GROUP = 'mcu'
87
88 # Regex group to extract logline from MCU logline in servod logs.
89 LINE_GROUP = 'line'
90
91 # This regex is used to extract the mcu and the line content from an
92 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
93 # Here is an example log-line:
94 #
95 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
96 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
97 #
98 # Here is conceptually how they are formatted:
99 #
100 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
101 # <output>
102 #
103 # The log format starts with a timestamp
104 MCU_RE = (r'[\d\-]+ [\d:,]+ '
105 # The mcu that is logging this is next.
106 r'- (?P<%s>\w+) - '
107 # Next, we have more log outputs before the actual line.
108 # Information about the file line, logging function etc.
109 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
110 # NOTE: if the log format changes, this regex needs to be
111 # adjusted.
112 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
113 # Lastly, we get the MCU's console line.
114 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
115 MCU_EXTRACTOR = re.compile(MCU_RE)
116
Otabek Kasimov545739c2020-08-20 00:24:21 -0700117 # Regex to detect timeout messages when USBC pigtail has timeout issue.
118 # e.g.: [475635.427072 PD TMOUT RX 1/1]
119 USBC_PIGTAIL_TIMEOUT_RE = r'\[[\d \.]{1,20}(PD TMOUT RX 1\/1)\]'
120
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800121 # Suffix to identify compressed logfiles.
122 COMPRESSION_SUFFIX = '.tbz2'
123
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700124 # A suffix to mark servod log directories that came from instance that
125 # ran during this servo_host, but are not the last one running e.g. when
126 # an instance (on purpose, or due to a bug) restarted in the middle of the
127 # run.
128 OLD_LOG_SUFFIX = 'old'
129
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700130 # Mapping servo board with their vid-pid
131 SERVO_VID_PID = {
132 'servo_v4':'18d1:501b',
133 'ccd_cr50':'18d1:5014',
134 'servo_micro':'18d1:501a',
135 'servo_v3':['18d1:5004', '0403:6014'],
136 }
137
Otabek Kasimov15963492020-06-23 21:10:51 -0700138 # States of verifiers
139 # True - verifier run and passed
140 # False - verifier run and failed
141 # None - verifier did not run or dependency failed
142 VERIFY_SUCCESS = True
143 VERIFY_FAILED = False
144 VERIFY_NOT_RUN = None
145
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800146 def _init_attributes(self):
147 self._servo_state = None
148 self.servo_port = None
149 self.servo_board = None
150 self.servo_model = None
151 self.servo_serial = None
Garry Wang000c6c02020-05-11 21:27:23 -0700152 # The flag that indicate if a servo is connected to a smart usbhub.
153 # TODO(xianuowang@) remove this flag once all usbhubs in the lab
154 # get replaced.
155 self.smart_usbhub = None
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800156 self._servo = None
Andrew McRaef0679932020-08-13 09:15:23 +1000157 self._tunnel_proxy = None
158 self._tunnel_proxy_lock = threading.Lock()
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700159 self._initial_instance_ts = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800160 # Flag to make sure that multiple calls to close do not result in the
161 # logic executing multiple times.
162 self._closed = False
Andrew McRaef0679932020-08-13 09:15:23 +1000163 # Per-thread local data
164 self._local = threading.local()
Fang Deng5d518f42013-08-02 14:04:32 -0700165
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700166 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700167 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700168 servo_model=None, servo_serial=None, is_in_lab=None,
169 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700170 """Initialize a ServoHost instance.
171
172 A ServoHost instance represents a host that controls a servo.
173
174 @param servo_host: Name of the host where the servod process
175 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600176 @param servo_port: Port the servod process is listening on. Defaults
177 to the SERVOD_PORT environment variable if set,
178 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700179 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700180 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800181 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
182 to None, for which utils.host_is_in_lab_zone will be
183 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700184
185 """
186 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700187 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800188 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700189 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700190 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700191 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700192 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800193
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800194 # The location of the log files on the servo host for this instance.
195 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
196 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700197 # Path of the servo host lock file.
198 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
199 + self.LOCK_FILE_POSTFIX)
200 # File path to declare a reboot request.
201 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
202 + self.REBOOT_FILE_POSTFIX)
203
204 # Lock the servo host if it's an in-lab labstation to prevent other
205 # task to reboot it until current task completes. We also wait and
206 # make sure the labstation is up here, in the case of the labstation is
207 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700208 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700209 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
210 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700211 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700212
Richard Barnette9a26ad62016-06-10 12:03:08 -0700213 self._repair_strategy = (
214 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700215
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700216 def __str__(self):
217 return "<%s '%s:%s'>" % (
218 type(self).__name__, self.hostname, self.servo_port)
219
Richard Barnette9a26ad62016-06-10 12:03:08 -0700220 def connect_servo(self):
Garry Wang8c8dc972020-06-09 13:41:51 -0700221 """ Initialize and setup servo for later use.
222 """
223 self.initilize_servo()
224 self.initialize_dut_for_servo()
225
226
227 def initilize_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700228 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700229
230 Initializes `self._servo` and then verifies that all network
231 connections are working. This will create an ssh tunnel if
232 it's required.
Garry Wang8c8dc972020-06-09 13:41:51 -0700233 """
234 self._servo = servo.Servo(servo_host=self,
235 servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700236
Garry Wang8c8dc972020-06-09 13:41:51 -0700237
238 def initialize_dut_for_servo(self):
239 """This method will do some setup for dut control, e.g. setup
240 main servo_v4 device, and also testing the connection between servo
241 and DUT. As a side effect of testing the connection, all signals on
242 the target servo are reset to default values, and the USB stick is
Richard Barnette9a26ad62016-06-10 12:03:08 -0700243 set to the neutral (off) position.
244 """
Garry Wang8c8dc972020-06-09 13:41:51 -0700245 if not self._servo:
246 raise hosts.AutoservVerifyError('Servo object needs to be'
247 ' initialized before initialize'
248 ' DUT.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700249 timeout, _ = retry.timeout(
Garry Wang8c8dc972020-06-09 13:41:51 -0700250 self._servo.initialize_dut,
251 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700252 if timeout:
Garry Wang8c8dc972020-06-09 13:41:51 -0700253 raise hosts.AutoservVerifyError('Initialize dut for servo timed'
254 ' out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700255
256
257 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700258 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700259
260 If we've previously successfully connected to our servo,
261 disconnect any established ssh tunnel, and set `self._servo`
262 back to `None`.
263 """
264 if self._servo:
265 # N.B. This call is safe even without a tunnel:
266 # rpc_server_tracker.disconnect() silently ignores
267 # unknown ports.
268 self.rpc_server_tracker.disconnect(self.servo_port)
269 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700270
Garry Wangc1288cf2019-12-17 14:58:00 -0800271
Andrew McRaef0679932020-08-13 09:15:23 +1000272 def _maybe_create_servod_ssh_tunnel_proxy(self):
273 """Create a xmlrpc proxy for use with a ssh tunnel.
274 A lock is used to safely create a singleton proxy.
275 """
276 with self._tunnel_proxy_lock:
277 if self._tunnel_proxy is None:
278 self._tunnel_proxy = self.rpc_server_tracker.xmlrpc_connect(
279 None,
280 self.servo_port,
281 ready_test_name=self.SERVO_READY_METHOD,
282 timeout_seconds=60,
283 request_timeout_seconds=3600,
284 server_desc=str(self))
285
286
287 def get_servod_server_proxy(self):
288 """Return a proxy if it exists; otherwise, create a new one.
289 A proxy can either be a ssh tunnel based proxy, or a httplib
290 based proxy.
Fang Deng5d518f42013-08-02 14:04:32 -0700291
292 @returns: An xmlrpclib.ServerProxy that is connected to the servod
293 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700294 """
Garry Wang11b5e872020-03-11 15:14:08 -0700295 if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
296 and not self.is_localhost()):
Andrew McRaef0679932020-08-13 09:15:23 +1000297 # Check for existing ssh tunnel proxy.
298 if self._tunnel_proxy is None:
299 self._maybe_create_servod_ssh_tunnel_proxy()
300 return self._tunnel_proxy
Richard Barnette9a26ad62016-06-10 12:03:08 -0700301 else:
Andrew McRaef0679932020-08-13 09:15:23 +1000302 # xmlrpc/httplib is not thread-safe, so each thread must have its
303 # own separate proxy connection.
304 if not hasattr(self._local, "_per_thread_proxy"):
305 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
306 self._local._per_thread_proxy = xmlrpclib.ServerProxy(remote)
307 return self._local._per_thread_proxy
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800308
309
Richard Barnette1edbb162016-11-01 11:47:50 -0700310 def verify(self, silent=False):
311 """Update the servo host and verify it's in a good state.
312
313 @param silent If true, suppress logging in `status.log`.
314 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700315 message = 'Beginning verify for servo host %s port %s serial %s'
316 message %= (self.hostname, self.servo_port, self.servo_serial)
317 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700318 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700319 self._repair_strategy.verify(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700320 self._servo_state = servo_constants.SERVO_STATE_WORKING
321 self.record('INFO', None, None,
322 'ServoHost verify set servo_state as WORKING')
Garry Wang63b8c382020-03-11 22:28:40 -0700323 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700324 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700325 self._servo_state = self.determine_servo_state()
326 self.record('INFO', None, None,
327 'ServoHost verify set servo_state as %s'
328 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700329 if self._is_critical_error(e):
330 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700331
332
Garry Wang7b0e1b72020-03-25 19:08:59 -0700333 def get_image_name_from_usbkey(self, usbkey_dev):
334 """Mount usb drive and check ChromeOS image name on it if there is
335 one. This method assumes the image_usbkey_direction is already set
336 to servo side.
337
338 @param: usbkey_dev usbkey dev path(e.g. /dev/sdb).
339
340 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
341 or empty string if no test image detected, or unexpected
342 error occurred.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700343 """
Garry Wang70e5d062020-04-03 18:01:05 -0700344 logging.info('Checking ChromeOS image name on usbkey.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700345 usb_mount_path = '/media/servo_usb/%s' % self.servo_port
346 unmount_cmd = 'umount %s' % usb_mount_path
347 # ChromeOS root fs is in /dev/sdx3
348 mount_cmd = 'mount -o ro %s3 %s' % (usbkey_dev, usb_mount_path)
349 # Unmount if there is an existing stale mount.
350 self.run(unmount_cmd, ignore_status=True)
351 # Create if the mount point is not existing.
352 self.run('mkdir -p %s' % usb_mount_path)
353 try:
354 # Attempt to mount the usb drive.
355 mount_result = self.run(mount_cmd, ignore_status=True)
Garry Wang70e5d062020-04-03 18:01:05 -0700356 if mount_result.exit_status != 0:
357 logging.error('Unexpected error occurred on mount usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700358 return ''
Garry Wang70e5d062020-04-03 18:01:05 -0700359
360 release_content = self.run(
361 'cat %s/etc/lsb-release' % usb_mount_path,
362 ignore_status=True).stdout.strip()
363
364 if not re.search(r'RELEASE_TRACK=.*test', release_content):
365 logging.info('The image on usbkey is not a test image')
366 return ''
367
368 return lsbrelease_utils.get_chromeos_release_builder_path(
369 lsb_release_content=release_content)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700370 finally:
Garry Wang70e5d062020-04-03 18:01:05 -0700371 logging.debug('Image check compeleted, unmounting the usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700372 self.run(unmount_cmd, ignore_status=True)
373
374
Garry Wang70e5d062020-04-03 18:01:05 -0700375 def _probe_and_validate_usb_dev(self):
376 """This method probe the usb dev path by talking to servo, and then
377 validate the dev path is valid block device to servohost.
378 Possible output:
379 1. Encounter error during probe usb dev, returns empty string.
380 2. probe usb dev completed without error but cannot find usb dev,
381 raise AutoservRepairError.
382 3. probe usb dev find a usb dev path, but failed validation in this
383 method, raise AutoservRepairError.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700384
Garry Wang70e5d062020-04-03 18:01:05 -0700385 @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
386 if unexpected error occurred during probe.
387 @raises: AutoservRepairError if servo couldn't probe the usb dev path
388 (servo.probe_host_usb_dev() returns empty string), or the dev path is
389 not valid block device to servohost.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700390 """
391 logging.info('Validating image usbkey on servo.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700392 try:
Garry Wang70e5d062020-04-03 18:01:05 -0700393 usb_dev = self._servo.probe_host_usb_dev()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700394 except Exception as e:
395 # We don't want any unexpected or transient servo communicating
396 # failure block usb repair, so capture all errors here.
397 logging.error(e, exc_info=True)
398 logging.error('Unexpected error occurred on get usbkey dev path,'
399 ' skipping usbkey validation.')
400 return ''
401
Garry Wang70e5d062020-04-03 18:01:05 -0700402 if usb_dev:
403 # probe_host_usb_dev() sometimes return stale record,
404 # so we need to make sure the path exists in fdisk.
405 validate_cmd = 'fdisk -l | grep %s' % usb_dev
Garry Wang11441182020-06-16 18:34:14 -0700406 try:
407 resp = self.run(validate_cmd, ignore_status=True, timeout=60)
408 if resp.exit_status == 0:
409 return usb_dev
Garry Wang70e5d062020-04-03 18:01:05 -0700410
Garry Wang11441182020-06-16 18:34:14 -0700411 logging.error('%s is reported from "image_usbkey_dev" control'
412 ' but not detected by fdisk!', usb_dev)
413 except error.AutoservRunError as e:
414 if 'Timeout encountered' in str(e):
415 logging.warning('Timeout encountered during fdisk run,'
416 ' skipping usbkey validation.')
417 return ''
418 raise
Garry Wang70e5d062020-04-03 18:01:05 -0700419
420 raise hosts.AutoservRepairError(
421 'No usbkey detected on servo, the usbkey may be either missing'
422 ' or broken. Please replace usbkey on the servo and retry.',
423 'missing usbkey')
424
425
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700426 def is_ec_supported(self):
Garry Wang9b8f2342020-04-17 16:34:09 -0700427 """Check if ec is supported on the servo_board"""
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700428 if self.servo_board:
429 try:
430 frm_config = config.Config(self.servo_board, self.servo_model)
431 return frm_config.chrome_ec
432 except Exception as e:
433 logging.error('Unexpected error when read from firmware'
434 ' configs; %s', str(e))
435 return False
436
Garry Wang70e5d062020-04-03 18:01:05 -0700437 def validate_image_usbkey(self):
438 """This method first validate if there is a recover usbkey on servo
439 that accessible to servohost, and second check if a ChromeOS image is
440 already on the usb drive and return the image_name so we can avoid
441 unnecessary download and flash to the recover usbkey on servo.
442
443 Please note that, there is special error handling logic here:
444 1. If unexpected error happens, we return empty string. So repair
445 actions will not get blocked.
446 2. If no working usbkey present on servo, but no errors, we'll raise
447 AutoservRepairError here.
448
449 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
450 or empty string if no test image detected, or unexpected
451 error occurred.
452 @raises: AutoservRepairError if the usbkey is not detected on servo.
453 """
454 usb_dev = self._probe_and_validate_usb_dev()
455 if usb_dev:
456 return self.get_image_name_from_usbkey(usb_dev)
457 else:
458 return ''
Garry Wang7b0e1b72020-03-25 19:08:59 -0700459
460
Richard Barnette1edbb162016-11-01 11:47:50 -0700461 def repair(self, silent=False):
462 """Attempt to repair servo host.
463
464 @param silent If true, suppress logging in `status.log`.
465 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700466 message = 'Beginning repair for servo host %s port %s serial %s'
467 message %= (self.hostname, self.servo_port, self.servo_serial)
468 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700469 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700470 self._repair_strategy.repair(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700471 self._servo_state = servo_constants.SERVO_STATE_WORKING
472 self.record('INFO', None, None,
473 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700474 # If target is a labstation then try to withdraw any existing
475 # reboot request created by this servo because it passed repair.
476 if self.is_labstation():
477 self.withdraw_reboot_request()
Garry Wang63b8c382020-03-11 22:28:40 -0700478 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700479 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700480 self._servo_state = self.determine_servo_state()
481 self.record('INFO', None, None,
482 'ServoHost repair set servo_state as %s'
483 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700484 if self._is_critical_error(e):
485 self.disconnect_servo()
486 self.stop_servod()
487 raise
488
489
490 def _is_critical_error(self, error):
491 if (isinstance(error, hosts.AutoservVerifyDependencyError)
492 and not error.is_critical()):
493 logging.warning('Non-critical verify failure(s) detected during'
494 ' verify/repair servo, servo connection will'
495 ' still up but may not fully functional.'
496 ' Some repair actions and servo depended'
497 ' tests may not run.')
498 return False
499 logging.info('Critical verify failure(s) detected during repair/verify'
500 ' servo. Disconnecting servo and stop servod, all repair '
501 'action and tests that depends on servo will not run.')
502 return True
Fang Deng5d518f42013-08-02 14:04:32 -0700503
504
Dan Shi4d478522014-02-14 13:46:32 -0800505 def get_servo(self):
506 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700507
Dan Shi4d478522014-02-14 13:46:32 -0800508 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700509 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700510 """
Dan Shi4d478522014-02-14 13:46:32 -0800511 return self._servo
512
513
Garry Wang79e9af62019-06-12 15:19:19 -0700514 def request_reboot(self):
515 """Request servohost to be rebooted when it's safe to by touch a file.
516 """
517 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700518 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700519 self.run('touch %s' % self._reboot_file, ignore_status=True)
520
521
Garry Wang464ff1e2019-07-18 17:20:34 -0700522 def withdraw_reboot_request(self):
523 """Withdraw a servohost reboot request if exists by remove the flag
524 file.
525 """
526 logging.debug('Withdrawing request to reboot servohost %s that created'
527 ' by servo with port # %s if exists.',
528 self.hostname, self.servo_port)
529 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
530
531
Garry Wangc1288cf2019-12-17 14:58:00 -0800532 def start_servod(self, quick_startup=False):
533 """Start the servod process on servohost.
534 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800535 # Skip if running on the localhost.(crbug.com/1038168)
536 if self.is_localhost():
537 logging.debug("Servohost is a localhost, skipping start servod.")
538 return
539
540 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800541 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800542 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800543 if self.servo_model:
544 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800545 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800546 logging.warning('Board for DUT is unknown; starting servod'
547 ' assuming a pre-configured board.')
548
549 cmd += ' PORT=%d' % self.servo_port
550 if self.servo_serial:
551 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800552
553 # Start servod with dual_v4 if the DUT/servo from designated pools.
554 dut_host_info = self.get_dut_host_info()
555 if dut_host_info:
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700556 # DUAL_V4: servo setup includes servo_micro and ccd_cr50
557 # connection to the DUT
558 is_dual_setup = False
Garry Wang11b5e872020-03-11 15:14:08 -0700559 if bool(dut_host_info.pools &
560 servo_constants.POOLS_SUPPORT_DUAL_V4):
Garry Wangd7367482020-02-27 13:52:40 -0800561 logging.debug('The DUT is detected in following designated'
562 ' pools %s,starting servod with DUAL_V4 option.',
Garry Wang11b5e872020-03-11 15:14:08 -0700563 servo_constants.POOLS_SUPPORT_DUAL_V4)
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700564 is_dual_setup = True
565 elif dut_host_info.attributes.get('servo_setup') == 'DUAL_V4':
566 logging.debug('The DUT servo setup specified in config as '
567 ' "DUAL_V4"')
568 is_dual_setup = True
569 if is_dual_setup:
Garry Wangd7367482020-02-27 13:52:40 -0800570 cmd += ' DUAL_V4=1'
571
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800572 # Remove the symbolic links from the logs. This helps ensure that
573 # a failed servod instantiation does not cause us to grab old logs
574 # by mistake.
575 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800576 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800577
578 # There's a lag between when `start servod` completes and when
579 # the _ServodConnectionVerifier trigger can actually succeed.
580 # The call to time.sleep() below gives time to make sure that
581 # the trigger won't fail after we return.
582
583 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
584 # But in the rare case all servo on a labstation are in heavy use they
585 # may take ~30 seconds. So the timeout value will double these value,
586 # and we'll try quick start up when first time initialize servohost,
587 # and use standard start up timeout in repair.
588 if quick_startup:
Garry Wang11b5e872020-03-11 15:14:08 -0700589 timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800590 else:
Garry Wang11b5e872020-03-11 15:14:08 -0700591 timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800592 logging.debug('Wait %s seconds for servod process fully up.', timeout)
593 time.sleep(timeout)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700594 # Cache the initial instance timestamp to check against servod restarts
595 self._initial_instance_ts = self.get_instance_logs_ts()
Garry Wangc1288cf2019-12-17 14:58:00 -0800596
597
598 def stop_servod(self):
599 """Stop the servod process on servohost.
600 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800601 # Skip if running on the localhost.(crbug.com/1038168)
602 if self.is_localhost():
603 logging.debug("Servohost is a localhost, skipping stop servod.")
604 return
605
Garry Wangc1288cf2019-12-17 14:58:00 -0800606 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800607 self.run('stop servod PORT=%d' % self.servo_port,
608 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800609 logging.debug('Wait %s seconds for servod process fully teardown.',
Garry Wang11b5e872020-03-11 15:14:08 -0700610 servo_constants.SERVOD_TEARDOWN_TIMEOUT)
611 time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
Garry Wangc1288cf2019-12-17 14:58:00 -0800612
613
614 def restart_servod(self, quick_startup=False):
615 """Restart the servod process on servohost.
616 """
617 self.stop_servod()
618 self.start_servod(quick_startup)
619
Garry Wang11b5e872020-03-11 15:14:08 -0700620
Garry Wangffbd2162020-04-17 16:13:48 -0700621 def _process_servodtool_error(self, response):
622 """Helper function to handle non-zero servodtool response.
623 """
624 if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
Garry Wangad245002020-05-15 15:20:23 -0700625 response.stdout):
Garry Wangffbd2162020-04-17 16:13:48 -0700626 logging.error('The servo is not plugged on a usb hub that supports'
627 ' power-cycle!')
Garry Wang000c6c02020-05-11 21:27:23 -0700628 # change the flag so we can update this label in later process.
629 self.smart_usbhub = False
Garry Wangffbd2162020-04-17 16:13:48 -0700630 return
631
632 if re.search(servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
633 self.servo_serial, response.stdout):
634 logging.error('No servo with serial %s found!', self.servo_serial)
635 return
636
637 logging.error('Unexpected error occurred from usbhub control, please'
638 ' file a bug and inform chrome-fleet-software@ team!')
639
Otabek Kasimov545739c2020-08-20 00:24:21 -0700640 def _is_usbc_pigtail_connection_timeout(self):
641 """Check if servo has issue with USBC pigtail connection timeout.
642
643 The usb_console has to be clean for good servo. If console generate
644 messages like (below) then issue is present:
645 [475635.427072 PD TMOUT RX 1/1]
646 RXERR1 Preamble
647 [475635.476044 PD TMOUT RX 1/1]
648 RXERR1 Preamble
649 """
650 if not self.servo_serial:
651 return False
652 logging.debug('Starting check if USBC pigtail connection timeout.')
653 try:
654 cmd = 'usb_console -d 18d1:501b -s %s' % self.servo_serial
655 resp = self.run(cmd, timeout=self.DEFAULT_TERMINAL_TIMEOUT)
656 result_lines = resp.stdout.splitlines()
657 for line in result_lines:
658 if re.match(self.USBC_PIGTAIL_TIMEOUT_RE, line):
659 return True
660 except Exception as e:
661 logging.debug('(Non-critical) %s.', e)
662 return False
663
664 def _reset_usbc_pigtail_connection(self):
665 """Reset USBC pigtail connection on servo board.
666
667 To reset need to run 'cc off' and then 'cc srcdts' in usb_console.
668 """
669 if not self.servo_serial:
670 return False
671 logging.debug('Starting reset USBC pigtail connection.')
672 def _run_command(cc_command):
673 """Run configuration chanel commands.
674
675 @returns: True if pas successful and False if fail.
676 """
677 try:
678 cmd = (r"echo 'cc %s' | usb_console -d 18d1:501b -s %s"
679 % (cc_command, self.servo_serial))
680 resp = self.run(cmd, timeout=self.DEFAULT_TERMINAL_TIMEOUT)
681 return True
682 except Exception as e:
683 logging.info('(Non-critical) %s.', e)
684 return False
685
686 logging.info('Turn off configuration channel. And wait 5 seconds.')
687 if _run_command('off'):
688 # wait till command will be effected
689 time.sleep(5)
690 logging.info('Turn on configuration channel. '
691 'And wait 15 seconds.')
692 if _run_command('srcdts'):
693 # wait till command will be effected
694 time.sleep(15)
695
696 def reset_usbc_pigtail_connection_on_need(self):
697 """Reset USBC pitgtail issue if it present."""
698 if not self.is_labstation():
699 logging.info('USBC pigtail reset applicable only for labstations')
700 return
701
702 if self._is_usbc_pigtail_connection_timeout():
703 logging.info('USBC pigtail issue detected on servo.')
704 self._reset_usbc_pigtail_connection()
705 fields = self._get_host_metrics_data()
706 fields['success'] = not self._is_usbc_pigtail_connection_timeout()
707 metrics.Counter(
708 'chromeos/autotest/repair/servo_usbc/reset'
709 ).increment(fields=fields)
Garry Wangffbd2162020-04-17 16:13:48 -0700710
711 def _get_servo_usb_devnum(self):
712 """Helper function to collect current usb devnum of servo.
713 """
Otabek Kasimov09192682020-06-01 18:17:44 -0700714 # TODO remove try-except when fix crbug.com/1087964
715 try:
716 cmd = 'servodtool device -s %s usb-path' % self.servo_serial
717 resp = self.run(cmd, ignore_status=True, timeout=30)
718 except Exception as e:
719 # Here we catch only timeout errors.
720 # Other errors is filtered by ignore_status=True
721 logging.debug('Attempt to get servo usb-path failed due to '
722 'timeout; %s', e)
723 return ''
724
Garry Wangffbd2162020-04-17 16:13:48 -0700725 if resp.exit_status != 0:
726 self._process_servodtool_error(resp)
727 return ''
728 usb_path = resp.stdout.strip()
729 logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
730
731 resp = self.run('cat %s/devnum' % usb_path,
732 ignore_status=True)
733 if resp.exit_status != 0:
734 self._process_servodtool_error(resp)
735 return ''
736 return resp.stdout.strip()
737
Garry Wang358aad42020-08-02 20:56:04 -0700738 def reboot_servo_v3_on_need(self):
739 """Check and reboot servo_v3 based on below conditions.
740 1. If there is an update pending on reboot.
741 2. Servo_v3 has been up for more than 96 hours.
742 """
743 if self.get_board() != 'beaglebone_servo':
744 logging.info('Servo reboot is only applicable for servo V3.')
Otabek Kasimove6df8102020-07-21 20:15:25 -0700745 return
746
Garry Wang358aad42020-08-02 20:56:04 -0700747 update_pending_reboot = (self._check_update_status() ==
748 self.UPDATE_STATE.PENDING_REBOOT)
749 uptime_hours = float(self.check_uptime())/3600
750 logging.info('Uptime of servo_v3: %s hour(s)', uptime_hours)
751 long_up_time = uptime_hours > 96
752
753 # Skip reboot if neither condition are met.
754 if not (update_pending_reboot or long_up_time):
Otabek Kasimove6df8102020-07-21 20:15:25 -0700755 return
756
Garry Wang358aad42020-08-02 20:56:04 -0700757 if update_pending_reboot:
758 message = 'Starting reboot servo_v3 because an update is pending.'
759 reboot_method = self._post_update_reboot
760 elif long_up_time:
761 message = 'Starting reboot servo_v3 because uptime > 96 hours.'
762 reboot_method = self._servo_host_reboot
763 self.record('INFO', None, None, message)
764 logging.info(message)
Otabek Kasimove6df8102020-07-21 20:15:25 -0700765 try:
Garry Wang358aad42020-08-02 20:56:04 -0700766 reboot_method()
Otabek Kasimove6df8102020-07-21 20:15:25 -0700767 message = 'Servo_v3 reboot completed successfully.'
768 except Exception as e:
769 logging.debug("Fail to reboot servo_v3; %s", e)
770 message = ('Servo_v3 reboot failed, please check debug log '
771 'for details.')
772 logging.info(message)
773 self.record('INFO', None, None, message)
Garry Wangffbd2162020-04-17 16:13:48 -0700774
775 def _reset_servo(self):
776 logging.info('Resetting servo through smart usbhub.')
Otabek Kasimov09192682020-06-01 18:17:44 -0700777 # TODO remove try-except when fix crbug.com/1087964
778 try:
779 resp = self.run('servodtool device -s %s power-cycle' %
780 self.servo_serial, ignore_status=True,
781 timeout=30)
782 if resp.exit_status != 0:
783 self._process_servodtool_error(resp)
784 return False
785 except Exception as e:
786 # Here we catch only timeout errors.
787 # Other errors is filtered by ignore_status=True
788 logging.debug('Attempt to reset servo failed due to timeout;'
789 ' %s', e)
Garry Wangffbd2162020-04-17 16:13:48 -0700790 return False
791
792 logging.debug('Wait %s seconds for servo to come back from reset.',
793 servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
794 time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
Garry Wang000c6c02020-05-11 21:27:23 -0700795 # change the flag so we can update this label in later process.
796 self.smart_usbhub = True
Garry Wangffbd2162020-04-17 16:13:48 -0700797 return True
798
799
800 def reset_servo(self):
801 """Reset(power-cycle) the servo via smart usbhub.
802 """
803 if not self.is_labstation():
804 logging.info('Servo reset is not applicable to servo_v3.')
805 return
806
807 pre_reset_devnum = self._get_servo_usb_devnum()
808 logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
809 result = self._reset_servo()
810 if not result:
Garry Wangfd5c8b62020-06-08 15:36:54 -0700811 message = ('Failed to reset servo with serial: %s. (Please ignore'
812 ' this error if the DUT is not connected to a smart'
813 ' usbhub).' % self.servo_serial)
Garry Wangffbd2162020-04-17 16:13:48 -0700814 logging.warning(message)
815 self.record('INFO', None, None, message)
816 return
817
818 post_reset_devnum = self._get_servo_usb_devnum()
819 logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
820 if not (pre_reset_devnum and post_reset_devnum):
821 message = ('Servo reset completed but unable to verify'
822 ' devnum change!')
823 elif pre_reset_devnum != post_reset_devnum:
824 message = ('Reset servo with serial %s completed successfully!'
825 % self.servo_serial)
826 else:
827 message = 'Servo reset completed but devnum is still not changed!'
828 logging.info(message)
829 self.record('INFO', None, None, message)
830
831
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800832 def _extract_compressed_logs(self, logdir, relevant_files):
833 """Decompress servod logs in |logdir|.
834
835 @param logdir: directory containing compressed servod logs.
836 @param relevant_files: list of files in |logdir| to consider.
837
838 @returns: tuple, (tarfiles, files) where
839 tarfiles: list of the compressed filenames that have been
840 extracted and deleted
841 files: list of the uncompressed files that were generated
842 """
843 # For all tar-files, first extract them to the directory, and
844 # then let the common flow handle them.
845 tarfiles = [cf for cf in relevant_files if
846 cf.endswith(self.COMPRESSION_SUFFIX)]
847 files = []
848 for f in tarfiles:
849 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
850 with tarfile.open(f) as tf:
851 # Each tarfile has only one member, as
852 # that's the compressed log.
853 member = tf.members[0]
854 # Manipulate so that it only extracts the basename, and not
855 # the directories etc.
856 member.name = norm_name
857 files.append(os.path.join(logdir, member.name))
858 tf.extract(member, logdir)
859 # File has been extracted: remove the compressed file.
860 os.remove(f)
861 return tarfiles, files
862
863 def _extract_mcu_logs(self, log_subdir):
864 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
865
866 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
867 lines from the logs to generate invidiual console logs e.g. after
868 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
869 those MCUs had any console input/output.
870
871 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
872 """
873 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
874 # files
875 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
876 if not os.path.exists(mcu_lines_file):
877 logging.info('No DEBUG logs found to extract MCU logs from.')
878 return
879 mcu_files = {}
880 mcu_file_template = '%s.txt'
881 with open(mcu_lines_file, 'r') as f:
882 for line in f:
883 match = self.MCU_EXTRACTOR.match(line)
884 if match:
885 mcu = match.group(self.MCU_GROUP).lower()
886 line = match.group(self.LINE_GROUP)
887 if mcu not in mcu_files:
888 mcu_file = os.path.join(log_subdir,
889 mcu_file_template % mcu)
890 mcu_files[mcu] = open(mcu_file, 'a')
891 fd = mcu_files[mcu]
892 fd.write(line + '\n')
893 for f in mcu_files:
894 mcu_files[f].close()
895
896
897 def remove_latest_log_symlinks(self):
898 """Remove the conveninence symlinks 'latest' servod logs."""
899 symlink_wildcard = '%s/latest*' % self.remote_log_dir
900 cmd = 'rm ' + symlink_wildcard
901 self.run(cmd, stderr_tee=None, ignore_status=True)
902
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700903 def probe_servod_restart(self, instance_ts, outdir):
904 """Grab servod logs from previous instances if part of this session.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800905
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700906 If since the last time this host called start_servod() servod crashed
907 and restarted, this helper finds those logs as well, and stores them
908 with the |OLD_LOG_SUFFIX| to investigate if necessary.
Prasad Vuppalapu5bd9da12020-03-31 01:46:47 +0000909
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700910 It also issues a panicinfo command to servo devices after the restart
911 to try and collect reboot information for debugging.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800912
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700913 @param instance_ts: the log timestamp that the current instance uses
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800914 @param outdir: directory to create a subdirectory into to place the
915 servod logs into.
916 """
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700917 if self._initial_instance_ts is None:
918 logging.info('No log timestamp grabbed successfully on servod '
919 'startup. Cannot check device restarts. Ignoring.')
920 return
921 if instance_ts == self._initial_instance_ts:
922 logging.debug('Servod appears to have run without restarting')
923 return
924 # Servod seems to have restarted (at least once). |_initial_instance_ts|
925 # is the first timestamp, and instance_ts is the current timestamp. Find
926 # all timestamps in between them, and grab the logs for each.
927 tss = self._find_instance_timestamps_between(self._initial_instance_ts,
928 instance_ts)
929 logging.info('Servod has restarted %d times between the start and the '
930 'end of this servo_host.', len(tss))
931 logging.info('This might be an issue. Will extract all logs from each '
932 'instance.')
933 logging.info('Logs that are not the currently running (about to turn '
934 'down) instance are maked with a .%s in their folder.',
935 self.OLD_LOG_SUFFIX)
936 for ts in tss:
937 self.get_instance_logs(ts, outdir, old=True)
938 # Lastly, servod has restarted due to a potential issue. Try to get
939 # panic information from servo micro and servo v4 for the current logs.
940 # This can only happen if the |_servo| attribute is initialized.
941 if self._servo:
942 for mcu in ['servo_micro', 'servo_v4']:
943 ctrl = '%s_uart_cmd' % mcu
944 if self._servo.has_control(ctrl):
945 logging.info('Trying to retrieve %r panicinfo into logs',
946 mcu)
947 try:
948 self._servo.set_nocheck(ctrl, 'panicinfo')
949 except error.TestFail as e:
950 logging.error('Failed to generate panicinfo for %r '
951 'logs. %s', mcu, str(e))
952
953 def _find_instance_timestamps_between(self, start_ts, end_ts):
954 """Find all log timestamps between [start_ts, end_ts).
955
956 @param start_ts: str, earliest log timestamp of interest
957 @param end_ts: str, latest log timestamp of interest
958
959 @returns: list, all timestamps between start_ts and end_ts, end_ts
960 exclusive, on the servo_host. An empty list on errors
961 """
962 # Simply get all timestamp, and then sort and remove
963 cmd = 'ls %s' % self.remote_log_dir
964 res = self.run(cmd, stderr_tee=None, ignore_status=True)
965 if res.exit_status != 0:
966 # Here we failed to find anything.
967 logging.info('Failed to find remote servod logs. Ignoring.')
968 return []
969 logfiles = res.stdout.strip().split()
970 timestamps = set()
971 for logfile in logfiles:
972 ts_match = self.TS_EXTRACTOR.match(logfile)
973 if not ts_match:
974 # Simply ignore files that fail the check. It might be the
975 # 'latest' symlinks or random files.
976 continue
977 timestamps.add(ts_match.group(self.TS_GROUP))
978 # At this point we have all unique timestamps.
979 timestamps = sorted(timestamps)
980 for ts in [start_ts, end_ts]:
981 if ts not in timestamps:
982 logging.error('Timestamp %r not in servod logs. Cannot query '
983 'for timestamps in between %r and %r', ts,
984 start_ts, end_ts)
985 return []
986 return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
987
988 def get_instance_logs_ts(self):
989 """Retrieve the currently running servod instance's log timestamp
990
991 @returns: str, timestamp for current instance, or None on failure
992 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800993 # First, extract the timestamp. This cmd gives the real filename of
994 # the latest aka current log file.
995 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
996 'then realpath %(dir)s/latest.DEBUG;'
997 'elif [ -f %(dir)s/latest ];'
998 'then realpath %(dir)s/latest;'
999 'else exit %(code)d;'
1000 'fi' % {'dir': self.remote_log_dir,
1001 'code': self.NO_SYMLINKS_CODE})
1002 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1003 if res.exit_status != 0:
1004 if res.exit_status == self.NO_SYMLINKS_CODE:
1005 logging.warning('servod log latest symlinks not found. '
1006 'This is likely due to an error starting up '
1007 'servod. Ignoring..')
1008 else:
1009 logging.warning('Failed to find servod logs on servo host.')
1010 logging.warning(res.stderr.strip())
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001011 return None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001012 fname = os.path.basename(res.stdout.strip())
1013 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
Ruben Rodriguez Buchillone9aa2b02020-03-04 12:14:28 -08001014 ts_match = self.TS_EXTRACTOR.match(fname)
1015 if not ts_match:
1016 logging.warning('Failed to extract timestamp from servod log file '
1017 '%r. Skipping. The servo host is using outdated '
1018 'servod logging and needs to be updated.', fname)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001019 return None
1020 return ts_match.group(self.TS_GROUP)
1021
1022 def get_instance_logs(self, instance_ts, outdir, old=False):
1023 """Collect all logs with |instance_ts| and dump into a dir in |outdir|
1024
1025 This method first collects all logs on the servo_host side pertaining
1026 to this servod instance (port, instatiation). It glues them together
1027 into combined log.[level].txt files and extracts all available MCU
1028 console I/O from the logs into individual files e.g. servo_v4.txt
1029
1030 All the output can be found in a directory inside |outdir| that
1031 this generates based on |LOG_DIR|, the servod port, and the instance
1032 timestamp on the servo_host side.
1033
1034 @param instance_ts: log timestamp to grab logfiles for
1035 @param outdir: directory to create a subdirectory into to place the
1036 servod logs into.
1037 @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
1038 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001039 # Create the local results log dir.
1040 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
1041 str(self.servo_port),
1042 instance_ts))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001043 if old:
1044 log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
1045 logging.info('Saving servod logs to %r.', log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001046 os.mkdir(log_dir)
1047 # Now, get all files with that timestamp.
1048 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
1049 instance_ts)
1050 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1051 files = res.stdout.strip().split()
1052 try:
1053 self.get_file(files, log_dir, try_rsync=False)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001054 if not os.listdir(log_dir):
1055 logging.info('No servod logs retrieved. Ignoring, and removing '
1056 '%r again.', log_dir)
1057 os.rmdir(log_dir)
1058 return
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001059 except error.AutoservRunError as e:
1060 result = e.result_obj
1061 if result.exit_status != 0:
1062 stderr = result.stderr.strip()
1063 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
1064 stderr or '\n%s' % result)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001065 # Remove the log_dir as nothing was added to it.
1066 os.rmdir(log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001067 return
1068 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
1069 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
1070 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
1071 # Create the joint files for each loglevel. i.e log.DEBUG
1072 joint_file = self.JOINT_LOG_PREFIX
1073 if level_name:
1074 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
1075 # This helps with some online tools to avoid complaints about an
1076 # unknown filetype.
1077 joint_file = joint_file + '.txt'
1078 joint_path = os.path.join(log_dir, joint_file)
1079 files = [f for f in local_files if level_name in f]
1080 if not files:
1081 # TODO(crrev.com/c/1793030): remove no-level case once CL
1082 # is pushed
1083 continue
1084 # Extract compressed logs if any.
1085 compressed, extracted = self._extract_compressed_logs(log_dir,
1086 files)
1087 files = list(set(files) - set(compressed))
1088 files.extend(extracted)
1089 # Need to sort. As they all share the same timestamp, and
1090 # loglevel, the index itself is sufficient. The highest index
1091 # is the oldest file, therefore we need a descending sort.
1092 def sortkey(f, level=level_name):
1093 """Custom sortkey to sort based on rotation number int."""
1094 if f.endswith(level_name): return 0
1095 return int(f.split('.')[-1])
1096
1097 files.sort(reverse=True, key=sortkey)
1098 # Just rename the first file rather than building from scratch.
1099 os.rename(files[0], joint_path)
1100 with open(joint_path, 'a') as joint_f:
1101 for logfile in files[1:]:
1102 # Transfer the file to the joint file line by line.
1103 with open(logfile, 'r') as log_f:
1104 for line in log_f:
1105 joint_f.write(line)
1106 # File has been written over. Delete safely.
1107 os.remove(logfile)
1108 # Need to remove all files form |local_files| so we don't
1109 # analyze them again.
1110 local_files = list(set(local_files) - set(files) - set(compressed))
1111 # Lastly, extract MCU logs from the joint logs.
1112 self._extract_mcu_logs(log_dir)
1113
Garry Wang79e9af62019-06-12 15:19:19 -07001114 def _lock(self):
1115 """lock servohost by touching a file.
1116 """
1117 logging.debug('Locking servohost %s by touching %s file',
1118 self.hostname, self._lock_file)
1119 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001120 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -07001121
1122
1123 def _unlock(self):
1124 """Unlock servohost by removing the lock file.
1125 """
1126 logging.debug('Unlocking servohost by removing %s file',
1127 self._lock_file)
1128 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001129 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -07001130
1131
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001132 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -07001133 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001134 # NOTE: throughout this method there are multiple attempts to catch
1135 # all errors. This is WAI as log grabbing should not fail tests.
1136 # However, the goal is to catch and handle/process all errors, thus
1137 # we print the traceback and ask for a bug.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001138 if self._closed:
1139 logging.debug('ServoHost is already closed.')
1140 return
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001141 instance_ts = self.get_instance_logs_ts()
1142 # TODO(crbug.com/1011516): once enabled, remove the check against
1143 # localhost and instead check against log-rotiation enablement.
1144 logs_available = (instance_ts is not None and
1145 self.job and
1146 not self.is_localhost())
1147 if logs_available:
1148 # Probe whether there was a servod restart, and grab those old
1149 # logs as well.
1150 try:
1151 self.probe_servod_restart(instance_ts, self.job.resultdir)
1152 except (error.AutoservRunError, error.TestFail) as e:
1153 logging.info('Failed to grab servo logs due to: %s. '
1154 'This error is forgiven.', str(e))
1155 except Exception as e:
1156 logging.error('Unexpected error probing for old logs. %s. '
1157 'Forgiven. Please file a bug and fix or catch '
1158 'in log probing function', str(e),
1159 exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001160 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001161 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -07001162 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001163 self._servo.close(outdir)
1164
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001165 if logs_available:
1166 # Grab current (not old like above) logs after the servo instance
1167 # was closed out.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001168 try:
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001169 self.get_instance_logs(instance_ts, self.job.resultdir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001170 except error.AutoservRunError as e:
1171 logging.info('Failed to grab servo logs due to: %s. '
1172 'This error is forgiven.', str(e))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001173 except Exception as e:
1174 logging.error('Unexpected error grabbing servod logs. %s. '
1175 'Forgiven. Please file a bug and fix or catch '
1176 'in log grabbing function', str(e), exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001177
Garry Wang7c00b0f2019-06-25 17:28:17 -07001178 if self._is_locked:
1179 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -07001180 try:
1181 self._unlock()
1182 except error.AutoservSSHTimeout:
1183 logging.error('Unlock servohost failed due to ssh timeout.'
1184 ' It may caused by servohost went down during'
1185 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -08001186 # We want always stop servod after task to minimum the impact of bad
1187 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -08001188 try:
1189 self.stop_servod()
1190 except error.AutoservRunError as e:
1191 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001192 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -08001193
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001194 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001195 # Mark closed.
1196 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001197
1198
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001199 def get_servo_state(self):
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001200 return self._servo_state
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001201
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001202 def _get_host_metrics_data(self):
1203 return {'port': self.servo_port,
Otabek Kasimov0ea47362020-07-11 20:55:09 -07001204 'host': self.get_dut_hostname() or self.hostname,
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001205 'board': self.servo_board or ''}
1206
1207 def _is_servo_device_connected(self, servo_type, serial):
1208 """Check if device is connected to the labstation.
1209
1210 Works for all servo devices connected to the labstation.
1211 For servo_v3 please use 'self._is_servo_board_present_on_servo_v3'
1212
1213 @param servo_type: The type of servo device. Expecting value can be
1214 servo_v4 or servo_micro.
1215 @param serial: The serial number of the device to detect it.
1216 """
1217 vid_pid = self.SERVO_VID_PID.get(servo_type)
1218 if not vid_pid or not serial:
1219 # device cannot detected without VID/PID or serial number
1220 return False
1221 logging.debug('Started to detect %s', servo_type)
1222 try:
1223 cmd = 'lsusb -v -d %s |grep iSerial |grep %s' % (vid_pid, serial)
1224 result = self.run(cmd, ignore_status=True, timeout=30)
1225 if result.exit_status == 0 and result.stdout.strip():
1226 logging.debug('The %s is plugged in to the host.', servo_type)
1227 return True
1228 logging.debug('%s device is not detected; %s', servo_type, result)
1229 return False
1230 except Exception as e:
1231 # can be triggered by timeout issue due running the script
1232 metrics.Counter(
1233 'chromeos/autotest/repair/servo_detection/timeout'
1234 ).increment(fields=self._get_host_metrics_data())
1235 logging.error('%s device is not detected; %s', servo_type, str(e))
1236 return None
1237
1238 def _is_servo_board_present_on_servo_v3(self):
1239 """Check if servo board is detected on servo_v3"""
1240 vid_pids = self.SERVO_VID_PID['servo_v3']
1241 if not vid_pids or len(vid_pids) == 0:
1242 # device cannot detected without VID/PID
1243 return False
1244 logging.debug('Started to detect servo board on servo_v3')
1245 not_detected = 'The servo board is not detected on servo_v3'
1246 try:
1247 cmd = 'lsusb | grep "%s"' % "\|".join(vid_pids)
1248 result = self.run(cmd, ignore_status=True, timeout=30)
1249 if result.exit_status == 0 and result.stdout.strip():
1250 logging.debug('The servo board is detected on servo_v3')
1251 return True
1252 logging.debug('%s; %s', not_detected, result)
1253 return False
1254 except Exception as e:
1255 # can be triggered by timeout issue due running the script
1256 metrics.Counter(
1257 'chromeos/autotest/repair/servo_detection/timeout'
1258 ).increment(fields=self._get_host_metrics_data())
1259 logging.error('%s; %s', not_detected, str(e))
1260 return None
1261
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001262 def _is_main_device_not_detected_on_servo_v4(self):
1263 """Check if servod cannot find main device on servo.
1264
1265 The check based on reading servod logs for servo_v4.
1266 """
1267 if not self._initial_instance_ts:
1268 # latest log not found
1269 return False
1270 logging.debug('latest log for servod created at %s',
1271 self._initial_instance_ts)
1272 try:
1273 log_created = calendar.timegm(time.strptime(
1274 self._initial_instance_ts,
1275 "%Y-%m-%d--%H-%M-%S.%f"))
1276 except ValueError as e:
1277 logging.debug('Cannot read time from log file name: %s',
1278 self._initial_instance_ts)
1279 return False
1280 min_time_created = calendar.timegm(time.gmtime())
1281 if min_time_created > log_created + 3600:
1282 # the log file is old we cannot use it
1283 logging.debug('log file was created more than hour ago, too old')
1284 return False
1285 logging.debug('latest log was created not longer then 1 hour ago')
1286
1287 # check if servod can detect main device by servo_v4
1288 message = 'ERROR - No servo micro or CCD detected for board'
1289 cmd = ('cat /var/log/servod_%s/log.%s.INFO |grep "%s"'
1290 % (self.servo_port, self._initial_instance_ts, message))
1291 result = self.run(cmd, ignore_status=True)
1292 if result.stdout.strip():
1293 logging.info('Servod cannot detect main device on the servo; '
1294 'Can be caused by bad hardware of servo or '
1295 'issue on the DUT side.')
1296 return True
1297 logging.debug('The main device is detected')
1298 return False
1299
Otabek Kasimov15963492020-06-23 21:10:51 -07001300 def get_verify_state(self, tag):
1301 """Return the state of servo verifier.
1302
1303 @returns: bool or None
1304 """
1305 return self._repair_strategy.verifier_is_good(tag)
1306
1307 def determine_servo_state(self):
1308 """Determine servo state based on the failed verifier.
1309
1310 @returns: servo state value
1311 The state detecting based on first fail verifier or collecting of
1312 them.
1313 """
1314 ssh = self.get_verify_state('servo_ssh')
1315 disk_space = self.get_verify_state('disk_space')
1316 start_servod = self.get_verify_state('servod_job')
1317 create_servo = self.get_verify_state('servod_connection')
1318 init_servo = self.get_verify_state('servod_control')
1319 pwr_button = self.get_verify_state('pwr_button')
1320 lid_open = self.get_verify_state('lid_open')
1321 ec_board = self.get_verify_state('ec_board')
1322 ccd_testlab = self.get_verify_state('ccd_testlab')
1323
1324 if not ssh:
1325 return servo_constants.SERVO_STATE_NO_SSH
1326
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001327 if (start_servod == self.VERIFY_FAILED
1328 or create_servo == self.VERIFY_FAILED):
1329 # sometimes servo can start with out present servo
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001330 if self.is_labstation():
1331 if not self.servo_serial:
1332 return servo_constants.SERVO_STATE_WRONG_CONFIG
1333 if self._is_servo_device_connected(
1334 'servo_v4',
1335 self.servo_serial) == False:
1336 return servo_constants.SERVO_STATE_NOT_CONNECTED
1337 elif self._is_servo_board_present_on_servo_v3() == False:
1338 return servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001339
1340 if start_servod == self.VERIFY_FAILED:
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001341 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1342
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001343 if create_servo == self.VERIFY_FAILED:
1344 if (self.is_labstation()
1345 and self._is_main_device_not_detected_on_servo_v4()):
1346 servo_type = None
1347 if self.get_dut_host_info():
1348 servo_type = self.get_dut_host_info().get_label_value(
1349 servo_constants.SERVO_TYPE_LABEL_PREFIX)
1350 if servo_type and 'servo_micro' in servo_type:
1351 serial = self.get_servo_micro_serial_number()
1352 logging.debug('servo_micro serial: %s', serial)
1353 if self._is_servo_device_detected('servo_micro',
1354 serial):
1355 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1356 # Device can be not detected because of DUT
1357 # TODO (otabek) update after b/159755652 and b/159754985
1358 metrics.Counter(
1359 'chromeos/autotest/repair/servo_state/needs_replacement'
1360 ).increment(fields=self._get_host_metrics_data())
1361 elif not self.is_labstation():
1362 # Here need logic to check if flex cable is connected
1363 pass
1364
Otabek Kasimov15963492020-06-23 21:10:51 -07001365 # one of the reason why servo can not initialized
1366 if ccd_testlab == self.VERIFY_FAILED:
1367 return servo_constants.SERVO_STATE_CCD_TESTLAB_ISSUE
1368
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001369 if (create_servo == self.VERIFY_FAILED
1370 or init_servo == self.VERIFY_FAILED):
Otabek Kasimov15963492020-06-23 21:10:51 -07001371 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1372
1373 if pwr_button == self.VERIFY_FAILED:
1374 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1375 if lid_open == self.VERIFY_FAILED:
1376 return servo_constants.SERVO_STATE_LID_OPEN_FAILED
1377 if ec_board == self.VERIFY_FAILED:
1378 return servo_constants.SERVO_STATE_EC_BROKEN
1379
Otabek Kasimov15963492020-06-23 21:10:51 -07001380 metrics.Counter(
1381 'chromeos/autotest/repair/unknown_servo_state'
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001382 ).increment(fields=self._get_host_metrics_data())
Otabek Kasimov15963492020-06-23 21:10:51 -07001383 logging.info('We do not have special state for this failure yet :)')
1384 return servo_constants.SERVO_STATE_BROKEN
1385
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001386
Richard Barnetteea3e4602016-06-10 12:36:41 -07001387def make_servo_hostname(dut_hostname):
1388 """Given a DUT's hostname, return the hostname of its servo.
1389
1390 @param dut_hostname: hostname of a DUT.
1391
1392 @return hostname of the DUT's servo.
1393
1394 """
1395 host_parts = dut_hostname.split('.')
1396 host_parts[0] = host_parts[0] + '-servo'
1397 return '.'.join(host_parts)
1398
1399
1400def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001401 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001402
1403 @param servo_hostname: hostname of the servo host.
1404
1405 @return True if it's up, False otherwise
1406 """
1407 # Technically, this duplicates the SSH ping done early in the servo
1408 # proxy initialization code. However, this ping ends in a couple
1409 # seconds when if fails, rather than the 60 seconds it takes to decide
1410 # that an SSH ping has timed out. Specifically, that timeout happens
1411 # when our servo DNS name resolves, but there is no host at that IP.
1412 logging.info('Pinging servo host at %s', servo_hostname)
1413 ping_config = ping_runner.PingConfig(
1414 servo_hostname, count=3,
1415 ignore_result=True, ignore_status=True)
1416 return ping_runner.PingRunner().ping(ping_config).received > 0
1417
1418
Richard Barnettee519dcd2016-08-15 17:37:17 -07001419def _map_afe_board_to_servo_board(afe_board):
1420 """Map a board we get from the AFE to a servo appropriate value.
1421
1422 Many boards are identical to other boards for servo's purposes.
1423 This function makes that mapping.
1424
1425 @param afe_board string board name received from AFE.
1426 @return board we expect servo to have.
1427
1428 """
1429 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1430 BOARD_MAP = {'gizmo': 'panther'}
1431 mapped_board = afe_board
1432 if afe_board in BOARD_MAP:
1433 mapped_board = BOARD_MAP[afe_board]
1434 else:
1435 for suffix in KNOWN_SUFFIXES:
1436 if afe_board.endswith(suffix):
1437 mapped_board = afe_board[0:-len(suffix)]
1438 break
1439 if mapped_board != afe_board:
1440 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1441 return mapped_board
1442
1443
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001444def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001445 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001446
Richard Barnetteea3e4602016-06-10 12:36:41 -07001447 @param dut_host Instance of `Host` on which to find the servo
1448 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -07001449 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001450 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001451 info = dut_host.host_info_store.get()
1452 servo_args = {k: v for k, v in info.attributes.iteritems()
Garry Wang11b5e872020-03-11 15:14:08 -07001453 if k in servo_constants.SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -07001454
Garry Wang11b5e872020-03-11 15:14:08 -07001455 if servo_constants.SERVO_PORT_ATTR in servo_args:
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001456 try:
Garry Wang11b5e872020-03-11 15:14:08 -07001457 servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1458 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001459 except ValueError:
1460 logging.error('servo port is not an int: %s',
Garry Wang11b5e872020-03-11 15:14:08 -07001461 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001462 # Reset servo_args because we don't want to use an invalid port.
Garry Wang11b5e872020-03-11 15:14:08 -07001463 servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001464
1465 if info.board:
Garry Wang11b5e872020-03-11 15:14:08 -07001466 servo_board = _map_afe_board_to_servo_board(info.board)
1467 servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -07001468 if info.model:
Garry Wang11b5e872020-03-11 15:14:08 -07001469 servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1470 return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -07001471
1472
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001473def _tweak_args_for_ssp_moblab(servo_args):
Garry Wang11b5e872020-03-11 15:14:08 -07001474 if (servo_args[servo_constants.SERVO_HOST_ATTR]
1475 in ['localhost', '127.0.0.1']):
1476 servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001477 'SSP', 'host_container_ip', type=str, default=None)
1478
1479
Dan Shi023aae32016-05-25 11:13:01 -07001480def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001481 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001482 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -08001483
Richard Barnette9a26ad62016-06-10 12:03:08 -07001484 This function attempts to create and verify or repair a `ServoHost`
1485 object for a servo connected to the given `dut`, subject to various
1486 constraints imposed by the parameters:
1487 * When the `servo_args` parameter is not `None`, a servo
1488 host must be created, and must be checked with `repair()`.
1489 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1490 true:
1491 * If `try_servo_repair` is true, then create a servo host and
1492 check it with `repair()`.
1493 * Otherwise, if the servo responds to `ping` then create a
1494 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -08001495
Richard Barnette9a26ad62016-06-10 12:03:08 -07001496 In cases where `servo_args` was not `None`, repair failure
1497 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001498 are logged and then discarded. Note that this only happens in cases
1499 where we're called from a test (not special task) control file that
1500 has an explicit dependency on servo. In that case, we require that
1501 repair not write to `status.log`, so as to avoid polluting test
1502 results.
1503
1504 TODO(jrbarnette): The special handling for servo in test control
1505 files is a thorn in my flesh; I dearly hope to see it cut out before
1506 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001507
1508 Parameters for a servo host consist of a host name, port number, and
1509 DUT board, and are determined from one of these sources, in order of
1510 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -07001511 * Servo attributes from the `dut` parameter take precedence over
1512 all other sources of information.
1513 * If a DNS entry for the servo based on the DUT hostname exists in
1514 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -07001515 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001516 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -07001517 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001518
1519 @param dut An instance of `Host` from which to take
1520 servo parameters (if available).
1521 @param servo_args A dictionary with servo parameters to use if
1522 they can't be found from `dut`. If this
1523 argument is supplied, unrepaired exceptions
1524 from `verify()` will be passed back to the
1525 caller.
1526 @param try_lab_servo If not true, servo host creation will be
1527 skipped unless otherwise required by the
1528 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001529 @param try_servo_repair If true, check a servo host with
1530 `repair()` instead of `verify()`.
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001531 @param dut_host_info: A HostInfo object of the DUT that connected
1532 to this servo.
Dan Shi4d478522014-02-14 13:46:32 -08001533
1534 @returns: A ServoHost object or None. See comments above.
1535
1536 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001537 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001538 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001539 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -07001540 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001541 if utils.in_moblab_ssp():
1542 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001543 logging.debug(
1544 'Overriding provided servo_args (%s) with arguments'
1545 ' determined from the host (%s)',
1546 servo_args,
1547 servo_args_override,
1548 )
Richard Barnetteea3e4602016-06-10 12:36:41 -07001549 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001550
Richard Barnetteea3e4602016-06-10 12:36:41 -07001551 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001552 logging.debug('No servo_args provided, and failed to find overrides.')
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001553 if try_lab_servo or servo_dependency:
Otabek Kasimov646812c2020-06-23 20:01:36 -07001554 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001555 else:
1556 # For regular test case which not required the servo
1557 return None, None
1558
Garry Wang11b5e872020-03-11 15:14:08 -07001559 servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1560 servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001561 if not _is_servo_host_information_exist(servo_hostname, servo_port):
1562 logging.debug(
1563 'Servo connection info missed hostname: %s , port: %s',
1564 servo_hostname, servo_port)
Otabek Kasimov646812c2020-06-23 20:01:36 -07001565 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001566 if not is_servo_host_information_valid(servo_hostname, servo_port):
1567 logging.debug(
1568 'Servo connection info is incorrect hostname: %s , port: %s',
1569 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001570 return None, servo_constants.SERVO_STATE_WRONG_CONFIG
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001571 if (not servo_dependency and not try_servo_repair and
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001572 not servo_host_is_up(servo_hostname)):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001573 logging.debug('ServoHost is not up.')
Otabek Kasimov646812c2020-06-23 20:01:36 -07001574 return None, servo_constants.SERVO_STATE_NO_SSH
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001575
Garry Wangebc015b2019-06-06 17:45:06 -07001576 newhost = ServoHost(**servo_args)
Garry Wangffbd2162020-04-17 16:13:48 -07001577
Otabek Kasimove6df8102020-07-21 20:15:25 -07001578 # Reset or reboot servo device only during AdminRepair tasks.
1579 if try_servo_repair:
1580 if newhost._is_locked:
1581 # Reset servo if the servo is locked, as we check if the servohost
1582 # is up, if the servohost is labstation and if the servohost is in
1583 # lab inside the locking logic.
Otabek Kasimov545739c2020-08-20 00:24:21 -07001584 newhost.reset_usbc_pigtail_connection_on_need()
Otabek Kasimove6df8102020-07-21 20:15:25 -07001585 newhost.reset_servo()
1586 else:
Garry Wang358aad42020-08-02 20:56:04 -07001587 try:
1588 newhost.reboot_servo_v3_on_need()
Garry Wang1f0d5332020-08-10 19:32:32 -07001589 except Exception as e:
1590 logging.info('[Non-critical] Unexpected error while trying to'
1591 ' reboot servo_v3, skipping the reboot; %s', e)
Otabek Kasimove6df8102020-07-21 20:15:25 -07001592
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -07001593 if dut:
1594 newhost.set_dut_hostname(dut.hostname)
Otabek Kasimov9e90ae12020-08-14 03:01:19 -07001595 if dut_host_info:
1596 newhost.set_dut_host_info(dut_host_info)
Garry Wangffbd2162020-04-17 16:13:48 -07001597
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001598 if try_lab_servo or try_servo_repair:
1599 try:
1600 logging.info("Check and update servo firmware.")
1601 servo_updater.update_servo_firmware(
1602 newhost,
1603 force_update=False)
1604 except Exception as e:
1605 logging.error("Servo device update error: %s", e)
1606
Garry Wangcdd27b22020-01-13 14:59:11 -08001607 try:
1608 newhost.restart_servod(quick_startup=True)
1609 except error.AutoservSSHTimeout:
1610 logging.warning("Restart servod failed due ssh connection "
1611 "to servohost timed out. This error is forgiven"
1612 " here, we will retry in servo repair process.")
1613 except error.AutoservRunError as e:
1614 logging.warning("Restart servod failed due to:\n%s\n"
1615 "This error is forgiven here, we will retry"
1616 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -07001617
Richard Barnette9a26ad62016-06-10 12:03:08 -07001618 # Note that the logic of repair() includes everything done
1619 # by verify(). It's sufficient to call one or the other;
1620 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001621 if servo_dependency:
1622 newhost.repair(silent=True)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001623 return newhost, newhost.get_servo_state()
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001624
1625 if try_servo_repair:
1626 try:
1627 newhost.repair()
1628 except Exception:
1629 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -07001630 else:
1631 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001632 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001633 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001634 logging.exception('servo verify failed for %s', newhost.hostname)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001635 return newhost, newhost.get_servo_state()
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001636
1637
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001638def _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001639 if hostname is None or len(hostname.strip()) == 0:
1640 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001641 if port is None:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001642 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001643 if not type(port) is int:
1644 try:
1645 int(port)
1646 except ValueError:
1647 return False
1648
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001649 return True
1650
1651
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001652def is_servo_host_information_valid(hostname, port):
Garry Wang9b8f2342020-04-17 16:34:09 -07001653 """Check if provided servo attributes are valid.
1654
1655 @param hostname Hostname of the servohost.
1656 @param port servo port number.
1657
1658 @returns: A bool value to indicate if provided servo attribute valid.
1659 """
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001660 if not _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001661 return False
1662 # checking range and correct of the port
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001663 port_int = int(port)
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001664 if port_int < 1 or port_int > 65000:
1665 return False
1666 # we expecting host contain only latters, digits and '-' or '_'
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001667 if not re.match('[a-zA-Z0-9-_\.]*$', hostname) or len(hostname) < 5:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001668 return False
1669 return True