blob: efcfccd6d13e0b601b2432941ed95ec04c8ac831 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
Fang Deng5d518f42013-08-02 14:04:32 -070012import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060013import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080014import re
15import tarfile
Andrew McRaef0679932020-08-13 09:15:23 +100016import threading
Garry Wang2b5eef92020-08-21 16:23:35 -070017import json
Garry Wangc1288cf2019-12-17 14:58:00 -080018import time
Dana Goyette4dc0adc2019-05-06 14:51:53 -070019import xmlrpclib
Otabek Kasimov120b6fa2020-07-03 00:15:27 -070020import calendar
Fang Deng5d518f42013-08-02 14:04:32 -070021
22from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070023from autotest_lib.client.common_lib import error
Richard Barnette9a26ad62016-06-10 12:03:08 -070024from autotest_lib.client.common_lib import hosts
Garry Wang7b0e1b72020-03-25 19:08:59 -070025from autotest_lib.client.common_lib import lsbrelease_utils
Fang Deng5d518f42013-08-02 14:04:32 -070026from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070027from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070028from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000029from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070030from autotest_lib.server.hosts import base_servohost
Garry Wang11b5e872020-03-11 15:14:08 -070031from autotest_lib.server.hosts import servo_constants
Otabek Kasimov4ea636e2020-04-14 23:35:06 -070032from autotest_lib.server.cros.faft.utils import config
Garry Wang11b5e872020-03-11 15:14:08 -070033from autotest_lib.client.common_lib import global_config
Otabek Kasimov8475cce2020-07-14 12:11:31 -070034from autotest_lib.site_utils.admin_audit import servo_updater
Garry Wangd7367482020-02-27 13:52:40 -080035
Otabek Kasimov15963492020-06-23 21:10:51 -070036try:
37 from chromite.lib import metrics
38except ImportError:
39 metrics = utils.metrics_mock
40
Dan Shi3b2adf62015-09-02 17:46:54 -070041_CONFIG = global_config.global_config
Fang Deng5d518f42013-08-02 14:04:32 -070042
Otabek Kasimova7ba91a2020-03-09 08:31:01 -070043
Garry Wangebc015b2019-06-06 17:45:06 -070044class ServoHost(base_servohost.BaseServoHost):
45 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070046 that with a servo instance for a specific port.
47
48 @type _servo: servo.Servo | None
49 """
Fang Deng5d518f42013-08-02 14:04:32 -070050
Raul E Rangel52ca2e82018-07-03 14:10:14 -060051 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070052
Dan Shie5b3c512014-08-21 12:12:09 -070053 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070054 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070055
Otabek Kasimov545739c2020-08-20 00:24:21 -070056 # Default timeout for run terminal command.
57 DEFAULT_TERMINAL_TIMEOUT = 30
58
xixuan6cf6d2f2016-01-29 15:29:00 -080059 # Ready test function
60 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070061
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080062 # Directory prefix on the servo host where the servod logs are stored.
63 SERVOD_LOG_PREFIX = '/var/log/servod'
64
65 # Exit code to use when symlinks for servod logs are not found.
66 NO_SYMLINKS_CODE = 9
67
68 # Directory in the job's results directory to dump the logs into.
69 LOG_DIR = 'servod'
70
71 # Prefix for joint loglevel files in the logs.
72 JOINT_LOG_PREFIX = 'log'
73
74 # Regex group to extract timestamp from logfile name.
75 TS_GROUP = 'ts'
76
77 # This regex is used to extract the timestamp from servod logs.
78 # files always start with log.
79 TS_RE = (r'log.'
80 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
81 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
82 # The loglevel is optional depending on labstation version.
83 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
84 TS_EXTRACTOR = re.compile(TS_RE)
85
86 # Regex group to extract MCU name from logline in servod logs.
87 MCU_GROUP = 'mcu'
88
89 # Regex group to extract logline from MCU logline in servod logs.
90 LINE_GROUP = 'line'
91
92 # This regex is used to extract the mcu and the line content from an
93 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
94 # Here is an example log-line:
95 #
96 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
97 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
98 #
99 # Here is conceptually how they are formatted:
100 #
101 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
102 # <output>
103 #
104 # The log format starts with a timestamp
105 MCU_RE = (r'[\d\-]+ [\d:,]+ '
106 # The mcu that is logging this is next.
107 r'- (?P<%s>\w+) - '
108 # Next, we have more log outputs before the actual line.
109 # Information about the file line, logging function etc.
110 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
111 # NOTE: if the log format changes, this regex needs to be
112 # adjusted.
113 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
114 # Lastly, we get the MCU's console line.
115 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
116 MCU_EXTRACTOR = re.compile(MCU_RE)
117
Otabek Kasimov545739c2020-08-20 00:24:21 -0700118 # Regex to detect timeout messages when USBC pigtail has timeout issue.
119 # e.g.: [475635.427072 PD TMOUT RX 1/1]
120 USBC_PIGTAIL_TIMEOUT_RE = r'\[[\d \.]{1,20}(PD TMOUT RX 1\/1)\]'
121
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800122 # Suffix to identify compressed logfiles.
123 COMPRESSION_SUFFIX = '.tbz2'
124
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700125 # A suffix to mark servod log directories that came from instance that
126 # ran during this servo_host, but are not the last one running e.g. when
127 # an instance (on purpose, or due to a bug) restarted in the middle of the
128 # run.
129 OLD_LOG_SUFFIX = 'old'
130
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700131 # Mapping servo board with their vid-pid
132 SERVO_VID_PID = {
133 'servo_v4':'18d1:501b',
134 'ccd_cr50':'18d1:5014',
135 'servo_micro':'18d1:501a',
136 'servo_v3':['18d1:5004', '0403:6014'],
137 }
138
Otabek Kasimov15963492020-06-23 21:10:51 -0700139 # States of verifiers
140 # True - verifier run and passed
141 # False - verifier run and failed
142 # None - verifier did not run or dependency failed
143 VERIFY_SUCCESS = True
144 VERIFY_FAILED = False
145 VERIFY_NOT_RUN = None
146
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800147 def _init_attributes(self):
148 self._servo_state = None
149 self.servo_port = None
150 self.servo_board = None
151 self.servo_model = None
152 self.servo_serial = None
Garry Wang000c6c02020-05-11 21:27:23 -0700153 # The flag that indicate if a servo is connected to a smart usbhub.
154 # TODO(xianuowang@) remove this flag once all usbhubs in the lab
155 # get replaced.
156 self.smart_usbhub = None
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800157 self._servo = None
Andrew McRaef0679932020-08-13 09:15:23 +1000158 self._tunnel_proxy = None
159 self._tunnel_proxy_lock = threading.Lock()
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700160 self._initial_instance_ts = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800161 # Flag to make sure that multiple calls to close do not result in the
162 # logic executing multiple times.
163 self._closed = False
Andrew McRaef0679932020-08-13 09:15:23 +1000164 # Per-thread local data
165 self._local = threading.local()
Fang Deng5d518f42013-08-02 14:04:32 -0700166
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700167 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700168 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700169 servo_model=None, servo_serial=None, is_in_lab=None,
170 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700171 """Initialize a ServoHost instance.
172
173 A ServoHost instance represents a host that controls a servo.
174
175 @param servo_host: Name of the host where the servod process
176 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600177 @param servo_port: Port the servod process is listening on. Defaults
178 to the SERVOD_PORT environment variable if set,
179 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700180 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700181 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800182 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
183 to None, for which utils.host_is_in_lab_zone will be
184 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700185
186 """
187 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700188 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800189 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700190 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700191 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700192 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700193 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800194
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800195 # The location of the log files on the servo host for this instance.
196 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
197 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700198 # Path of the servo host lock file.
199 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
200 + self.LOCK_FILE_POSTFIX)
201 # File path to declare a reboot request.
202 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
203 + self.REBOOT_FILE_POSTFIX)
204
205 # Lock the servo host if it's an in-lab labstation to prevent other
206 # task to reboot it until current task completes. We also wait and
207 # make sure the labstation is up here, in the case of the labstation is
208 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700209 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700210 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
211 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700212 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700213
Richard Barnette9a26ad62016-06-10 12:03:08 -0700214 self._repair_strategy = (
215 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700216
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700217 def __str__(self):
218 return "<%s '%s:%s'>" % (
219 type(self).__name__, self.hostname, self.servo_port)
220
Richard Barnette9a26ad62016-06-10 12:03:08 -0700221 def connect_servo(self):
Garry Wang8c8dc972020-06-09 13:41:51 -0700222 """ Initialize and setup servo for later use.
223 """
224 self.initilize_servo()
225 self.initialize_dut_for_servo()
226
227
228 def initilize_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700229 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700230
231 Initializes `self._servo` and then verifies that all network
232 connections are working. This will create an ssh tunnel if
233 it's required.
Garry Wang8c8dc972020-06-09 13:41:51 -0700234 """
235 self._servo = servo.Servo(servo_host=self,
236 servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700237
Garry Wang8c8dc972020-06-09 13:41:51 -0700238
239 def initialize_dut_for_servo(self):
240 """This method will do some setup for dut control, e.g. setup
241 main servo_v4 device, and also testing the connection between servo
242 and DUT. As a side effect of testing the connection, all signals on
243 the target servo are reset to default values, and the USB stick is
Richard Barnette9a26ad62016-06-10 12:03:08 -0700244 set to the neutral (off) position.
245 """
Garry Wang8c8dc972020-06-09 13:41:51 -0700246 if not self._servo:
247 raise hosts.AutoservVerifyError('Servo object needs to be'
248 ' initialized before initialize'
249 ' DUT.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700250 timeout, _ = retry.timeout(
Garry Wang8c8dc972020-06-09 13:41:51 -0700251 self._servo.initialize_dut,
252 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700253 if timeout:
Garry Wang8c8dc972020-06-09 13:41:51 -0700254 raise hosts.AutoservVerifyError('Initialize dut for servo timed'
255 ' out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700256
257
258 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700259 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700260
261 If we've previously successfully connected to our servo,
262 disconnect any established ssh tunnel, and set `self._servo`
263 back to `None`.
264 """
265 if self._servo:
266 # N.B. This call is safe even without a tunnel:
267 # rpc_server_tracker.disconnect() silently ignores
268 # unknown ports.
269 self.rpc_server_tracker.disconnect(self.servo_port)
270 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700271
Garry Wangc1288cf2019-12-17 14:58:00 -0800272
Andrew McRaef0679932020-08-13 09:15:23 +1000273 def _maybe_create_servod_ssh_tunnel_proxy(self):
274 """Create a xmlrpc proxy for use with a ssh tunnel.
275 A lock is used to safely create a singleton proxy.
276 """
277 with self._tunnel_proxy_lock:
278 if self._tunnel_proxy is None:
279 self._tunnel_proxy = self.rpc_server_tracker.xmlrpc_connect(
280 None,
281 self.servo_port,
282 ready_test_name=self.SERVO_READY_METHOD,
283 timeout_seconds=60,
284 request_timeout_seconds=3600,
285 server_desc=str(self))
286
287
288 def get_servod_server_proxy(self):
289 """Return a proxy if it exists; otherwise, create a new one.
290 A proxy can either be a ssh tunnel based proxy, or a httplib
291 based proxy.
Fang Deng5d518f42013-08-02 14:04:32 -0700292
293 @returns: An xmlrpclib.ServerProxy that is connected to the servod
294 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700295 """
Garry Wang11b5e872020-03-11 15:14:08 -0700296 if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
297 and not self.is_localhost()):
Andrew McRaef0679932020-08-13 09:15:23 +1000298 # Check for existing ssh tunnel proxy.
299 if self._tunnel_proxy is None:
300 self._maybe_create_servod_ssh_tunnel_proxy()
301 return self._tunnel_proxy
Richard Barnette9a26ad62016-06-10 12:03:08 -0700302 else:
Andrew McRaef0679932020-08-13 09:15:23 +1000303 # xmlrpc/httplib is not thread-safe, so each thread must have its
304 # own separate proxy connection.
305 if not hasattr(self._local, "_per_thread_proxy"):
306 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
307 self._local._per_thread_proxy = xmlrpclib.ServerProxy(remote)
308 return self._local._per_thread_proxy
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800309
310
Richard Barnette1edbb162016-11-01 11:47:50 -0700311 def verify(self, silent=False):
312 """Update the servo host and verify it's in a good state.
313
314 @param silent If true, suppress logging in `status.log`.
315 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700316 message = 'Beginning verify for servo host %s port %s serial %s'
317 message %= (self.hostname, self.servo_port, self.servo_serial)
318 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700319 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700320 self._repair_strategy.verify(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700321 self._servo_state = servo_constants.SERVO_STATE_WORKING
322 self.record('INFO', None, None,
323 'ServoHost verify set servo_state as WORKING')
Garry Wang63b8c382020-03-11 22:28:40 -0700324 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700325 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700326 self._servo_state = self.determine_servo_state()
327 self.record('INFO', None, None,
328 'ServoHost verify set servo_state as %s'
329 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700330 if self._is_critical_error(e):
331 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700332
Garry Wang2b5eef92020-08-21 16:23:35 -0700333 def _get_default_usbkey_mount_path(self):
334 return '/media/servo_usb/%s' % self.servo_port
Fang Deng5d518f42013-08-02 14:04:32 -0700335
Garry Wang7b0e1b72020-03-25 19:08:59 -0700336 def get_image_name_from_usbkey(self, usbkey_dev):
337 """Mount usb drive and check ChromeOS image name on it if there is
338 one. This method assumes the image_usbkey_direction is already set
339 to servo side.
340
341 @param: usbkey_dev usbkey dev path(e.g. /dev/sdb).
342
343 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
344 or empty string if no test image detected, or unexpected
345 error occurred.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700346 """
Garry Wang70e5d062020-04-03 18:01:05 -0700347 logging.info('Checking ChromeOS image name on usbkey.')
Garry Wang2b5eef92020-08-21 16:23:35 -0700348 mount_dst = self._get_default_usbkey_mount_path()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700349 # Unmount if there is an existing stale mount.
Garry Wang2b5eef92020-08-21 16:23:35 -0700350 self._unmount_drive(mount_dst)
351 # ChromeOS root fs is in /dev/sdx3
352 mount_src = usbkey_dev + '3'
Garry Wang7b0e1b72020-03-25 19:08:59 -0700353 try:
Garry Wang2b5eef92020-08-21 16:23:35 -0700354 if not self._mount_drive(mount_src, mount_dst):
355 logging.debug('Unexpected error occurred on mount usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700356 return ''
Garry Wang70e5d062020-04-03 18:01:05 -0700357
358 release_content = self.run(
Garry Wang2b5eef92020-08-21 16:23:35 -0700359 'cat %s/etc/lsb-release' % mount_dst,
Garry Wang70e5d062020-04-03 18:01:05 -0700360 ignore_status=True).stdout.strip()
361
362 if not re.search(r'RELEASE_TRACK=.*test', release_content):
363 logging.info('The image on usbkey is not a test image')
364 return ''
365
366 return lsbrelease_utils.get_chromeos_release_builder_path(
367 lsb_release_content=release_content)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700368 finally:
Garry Wang70e5d062020-04-03 18:01:05 -0700369 logging.debug('Image check compeleted, unmounting the usb drive.')
Garry Wang2b5eef92020-08-21 16:23:35 -0700370 self._unmount_drive(mount_dst)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700371
Garry Wang2b5eef92020-08-21 16:23:35 -0700372 def _extract_firmware_image_from_usbkey(self, fw_dst):
373 """Extract firmware images from the usbkey on servo, this method
374 assumes there is already a ChromeOS test image staged on servo.
375
376 @param: fw_dst the path that we'll copy firmware images to.
377
378 @returns: a json format string of firmware manifest data.
379 """
380 usbkey_dev = self._probe_and_validate_usb_dev()
381 if not usbkey_dev:
382 raise hosts.AutoservRepairError('Unexpected error occurred when'
383 ' probe usbkey dev path, please check logs for detail.')
384
385 mount_dst = self._get_default_usbkey_mount_path()
386 # Unmount if there is an existing stale mount.
387 self._unmount_drive(mount_dst)
388 # ChromeOS root fs is in /dev/sdx3
389 mount_src = usbkey_dev + '3'
390 try:
391 if not self._mount_drive(mount_src, mount_dst):
392 raise hosts.AutoservRepairError('Failed to extract firmware'
393 ' image; Unable to mount %s.' % usbkey_dev,
394 'unable to mount usbkey')
395 updater_bin = os.path.join(mount_dst,
396 'usr/sbin/chromeos-firmwareupdate')
397 self.run('%s --unpack %s' % (updater_bin, fw_dst))
398 return self.run('%s --manifest' % updater_bin).stdout
399 finally:
400 self._unmount_drive(mount_dst)
401
402 def prepare_repair_firmware_image(self, fw_dst=None):
403 """Prepare firmware image on the servohost for auto repair process
404 to consume.
405
406 @param: fw_dst the path that we want to store firmware image on
407 the servohost.
408
409 @returns: A tuple that containes ec firmware image path and bios
410 firmware image path on the servohost, or None if type of
411 image is not available based on manifest and dut's model.
412 """
413 model = self.servo_model or self._dut_host_info.model
414 if not model:
415 raise hosts.AutoservRepairError(
416 'Could not determine DUT\'s model.',
417 'model infomation unknown')
418
419 if not fw_dst:
420 fw_dst = '/tmp/firmware_image/%s' % self.servo_port
421 # Cleanup and re-create dst path to have a fresh start.
422 self.run('rm -rf %s' % fw_dst)
423 self.run('mkdir -p %s' % fw_dst)
424
425 manifest = json.loads(self._extract_firmware_image_from_usbkey(fw_dst))
426 model_manifest = manifest.get(model)
427 if not model_manifest:
428 raise hosts.AutoservRepairError('Could not find firmware manifest'
429 ' for model:%s' % model, 'model manifest not found')
430 try:
431 ec_image = os.path.join(fw_dst, model_manifest['ec']['image'])
432 except KeyError:
433 ec_image = None
434 try:
435 bios_image = os.path.join(fw_dst, model_manifest['host']['image'])
436 except KeyError:
437 bios_image = None
438 if not ec_image and not bios_image:
439 raise hosts.AutoservRepairError('Could not find any firmware image'
440 ' for model:%s' % model, 'cannot find firmware image')
441 return ec_image, bios_image
Garry Wang7b0e1b72020-03-25 19:08:59 -0700442
Garry Wang70e5d062020-04-03 18:01:05 -0700443 def _probe_and_validate_usb_dev(self):
444 """This method probe the usb dev path by talking to servo, and then
445 validate the dev path is valid block device to servohost.
446 Possible output:
447 1. Encounter error during probe usb dev, returns empty string.
448 2. probe usb dev completed without error but cannot find usb dev,
449 raise AutoservRepairError.
450 3. probe usb dev find a usb dev path, but failed validation in this
451 method, raise AutoservRepairError.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700452
Garry Wang70e5d062020-04-03 18:01:05 -0700453 @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
454 if unexpected error occurred during probe.
455 @raises: AutoservRepairError if servo couldn't probe the usb dev path
456 (servo.probe_host_usb_dev() returns empty string), or the dev path is
457 not valid block device to servohost.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700458 """
459 logging.info('Validating image usbkey on servo.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700460 try:
Garry Wang70e5d062020-04-03 18:01:05 -0700461 usb_dev = self._servo.probe_host_usb_dev()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700462 except Exception as e:
463 # We don't want any unexpected or transient servo communicating
464 # failure block usb repair, so capture all errors here.
465 logging.error(e, exc_info=True)
466 logging.error('Unexpected error occurred on get usbkey dev path,'
467 ' skipping usbkey validation.')
468 return ''
469
Garry Wang70e5d062020-04-03 18:01:05 -0700470 if usb_dev:
471 # probe_host_usb_dev() sometimes return stale record,
472 # so we need to make sure the path exists in fdisk.
473 validate_cmd = 'fdisk -l | grep %s' % usb_dev
Garry Wang11441182020-06-16 18:34:14 -0700474 try:
475 resp = self.run(validate_cmd, ignore_status=True, timeout=60)
476 if resp.exit_status == 0:
477 return usb_dev
Garry Wang70e5d062020-04-03 18:01:05 -0700478
Garry Wang11441182020-06-16 18:34:14 -0700479 logging.error('%s is reported from "image_usbkey_dev" control'
480 ' but not detected by fdisk!', usb_dev)
481 except error.AutoservRunError as e:
482 if 'Timeout encountered' in str(e):
483 logging.warning('Timeout encountered during fdisk run,'
484 ' skipping usbkey validation.')
485 return ''
486 raise
Garry Wang70e5d062020-04-03 18:01:05 -0700487
488 raise hosts.AutoservRepairError(
489 'No usbkey detected on servo, the usbkey may be either missing'
490 ' or broken. Please replace usbkey on the servo and retry.',
491 'missing usbkey')
492
493
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700494 def is_ec_supported(self):
Garry Wang9b8f2342020-04-17 16:34:09 -0700495 """Check if ec is supported on the servo_board"""
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700496 if self.servo_board:
497 try:
498 frm_config = config.Config(self.servo_board, self.servo_model)
499 return frm_config.chrome_ec
500 except Exception as e:
501 logging.error('Unexpected error when read from firmware'
502 ' configs; %s', str(e))
503 return False
504
Garry Wang70e5d062020-04-03 18:01:05 -0700505 def validate_image_usbkey(self):
506 """This method first validate if there is a recover usbkey on servo
507 that accessible to servohost, and second check if a ChromeOS image is
508 already on the usb drive and return the image_name so we can avoid
509 unnecessary download and flash to the recover usbkey on servo.
510
511 Please note that, there is special error handling logic here:
512 1. If unexpected error happens, we return empty string. So repair
513 actions will not get blocked.
514 2. If no working usbkey present on servo, but no errors, we'll raise
515 AutoservRepairError here.
516
517 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
518 or empty string if no test image detected, or unexpected
519 error occurred.
520 @raises: AutoservRepairError if the usbkey is not detected on servo.
521 """
522 usb_dev = self._probe_and_validate_usb_dev()
523 if usb_dev:
524 return self.get_image_name_from_usbkey(usb_dev)
525 else:
526 return ''
Garry Wang7b0e1b72020-03-25 19:08:59 -0700527
Richard Barnette1edbb162016-11-01 11:47:50 -0700528 def repair(self, silent=False):
529 """Attempt to repair servo host.
530
531 @param silent If true, suppress logging in `status.log`.
532 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700533 message = 'Beginning repair for servo host %s port %s serial %s'
534 message %= (self.hostname, self.servo_port, self.servo_serial)
535 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700536 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700537 self._repair_strategy.repair(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700538 self._servo_state = servo_constants.SERVO_STATE_WORKING
539 self.record('INFO', None, None,
540 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700541 # If target is a labstation then try to withdraw any existing
542 # reboot request created by this servo because it passed repair.
543 if self.is_labstation():
544 self.withdraw_reboot_request()
Garry Wang63b8c382020-03-11 22:28:40 -0700545 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700546 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700547 self._servo_state = self.determine_servo_state()
548 self.record('INFO', None, None,
549 'ServoHost repair set servo_state as %s'
550 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700551 if self._is_critical_error(e):
552 self.disconnect_servo()
553 self.stop_servod()
554 raise
555
556
557 def _is_critical_error(self, error):
558 if (isinstance(error, hosts.AutoservVerifyDependencyError)
559 and not error.is_critical()):
560 logging.warning('Non-critical verify failure(s) detected during'
561 ' verify/repair servo, servo connection will'
562 ' still up but may not fully functional.'
563 ' Some repair actions and servo depended'
564 ' tests may not run.')
565 return False
566 logging.info('Critical verify failure(s) detected during repair/verify'
567 ' servo. Disconnecting servo and stop servod, all repair '
568 'action and tests that depends on servo will not run.')
569 return True
Fang Deng5d518f42013-08-02 14:04:32 -0700570
571
Dan Shi4d478522014-02-14 13:46:32 -0800572 def get_servo(self):
573 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700574
Dan Shi4d478522014-02-14 13:46:32 -0800575 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700576 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700577 """
Dan Shi4d478522014-02-14 13:46:32 -0800578 return self._servo
579
580
Garry Wang79e9af62019-06-12 15:19:19 -0700581 def request_reboot(self):
582 """Request servohost to be rebooted when it's safe to by touch a file.
583 """
584 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700585 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700586 self.run('touch %s' % self._reboot_file, ignore_status=True)
587
588
Garry Wang464ff1e2019-07-18 17:20:34 -0700589 def withdraw_reboot_request(self):
590 """Withdraw a servohost reboot request if exists by remove the flag
591 file.
592 """
593 logging.debug('Withdrawing request to reboot servohost %s that created'
594 ' by servo with port # %s if exists.',
595 self.hostname, self.servo_port)
596 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
597
598
Garry Wangc1288cf2019-12-17 14:58:00 -0800599 def start_servod(self, quick_startup=False):
600 """Start the servod process on servohost.
601 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800602 # Skip if running on the localhost.(crbug.com/1038168)
603 if self.is_localhost():
604 logging.debug("Servohost is a localhost, skipping start servod.")
605 return
606
607 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800608 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800609 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800610 if self.servo_model:
611 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800612 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800613 logging.warning('Board for DUT is unknown; starting servod'
614 ' assuming a pre-configured board.')
615
616 cmd += ' PORT=%d' % self.servo_port
617 if self.servo_serial:
618 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800619
620 # Start servod with dual_v4 if the DUT/servo from designated pools.
621 dut_host_info = self.get_dut_host_info()
622 if dut_host_info:
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700623 # DUAL_V4: servo setup includes servo_micro and ccd_cr50
624 # connection to the DUT
625 is_dual_setup = False
Garry Wang11b5e872020-03-11 15:14:08 -0700626 if bool(dut_host_info.pools &
627 servo_constants.POOLS_SUPPORT_DUAL_V4):
Garry Wangd7367482020-02-27 13:52:40 -0800628 logging.debug('The DUT is detected in following designated'
629 ' pools %s,starting servod with DUAL_V4 option.',
Garry Wang11b5e872020-03-11 15:14:08 -0700630 servo_constants.POOLS_SUPPORT_DUAL_V4)
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700631 is_dual_setup = True
632 elif dut_host_info.attributes.get('servo_setup') == 'DUAL_V4':
633 logging.debug('The DUT servo setup specified in config as '
634 ' "DUAL_V4"')
635 is_dual_setup = True
636 if is_dual_setup:
Garry Wangd7367482020-02-27 13:52:40 -0800637 cmd += ' DUAL_V4=1'
638
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800639 # Remove the symbolic links from the logs. This helps ensure that
640 # a failed servod instantiation does not cause us to grab old logs
641 # by mistake.
642 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800643 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800644
645 # There's a lag between when `start servod` completes and when
646 # the _ServodConnectionVerifier trigger can actually succeed.
647 # The call to time.sleep() below gives time to make sure that
648 # the trigger won't fail after we return.
649
650 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
651 # But in the rare case all servo on a labstation are in heavy use they
652 # may take ~30 seconds. So the timeout value will double these value,
653 # and we'll try quick start up when first time initialize servohost,
654 # and use standard start up timeout in repair.
655 if quick_startup:
Garry Wang11b5e872020-03-11 15:14:08 -0700656 timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800657 else:
Garry Wang11b5e872020-03-11 15:14:08 -0700658 timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800659 logging.debug('Wait %s seconds for servod process fully up.', timeout)
660 time.sleep(timeout)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700661 # Cache the initial instance timestamp to check against servod restarts
662 self._initial_instance_ts = self.get_instance_logs_ts()
Garry Wangc1288cf2019-12-17 14:58:00 -0800663
664
665 def stop_servod(self):
666 """Stop the servod process on servohost.
667 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800668 # Skip if running on the localhost.(crbug.com/1038168)
669 if self.is_localhost():
670 logging.debug("Servohost is a localhost, skipping stop servod.")
671 return
672
Garry Wangc1288cf2019-12-17 14:58:00 -0800673 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800674 self.run('stop servod PORT=%d' % self.servo_port,
675 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800676 logging.debug('Wait %s seconds for servod process fully teardown.',
Garry Wang11b5e872020-03-11 15:14:08 -0700677 servo_constants.SERVOD_TEARDOWN_TIMEOUT)
678 time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
Garry Wangc1288cf2019-12-17 14:58:00 -0800679
680
681 def restart_servod(self, quick_startup=False):
682 """Restart the servod process on servohost.
683 """
684 self.stop_servod()
685 self.start_servod(quick_startup)
686
Garry Wang11b5e872020-03-11 15:14:08 -0700687
Garry Wangffbd2162020-04-17 16:13:48 -0700688 def _process_servodtool_error(self, response):
689 """Helper function to handle non-zero servodtool response.
690 """
691 if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
Garry Wangad245002020-05-15 15:20:23 -0700692 response.stdout):
Garry Wangffbd2162020-04-17 16:13:48 -0700693 logging.error('The servo is not plugged on a usb hub that supports'
694 ' power-cycle!')
Garry Wang000c6c02020-05-11 21:27:23 -0700695 # change the flag so we can update this label in later process.
696 self.smart_usbhub = False
Garry Wangffbd2162020-04-17 16:13:48 -0700697 return
698
699 if re.search(servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
700 self.servo_serial, response.stdout):
701 logging.error('No servo with serial %s found!', self.servo_serial)
702 return
703
704 logging.error('Unexpected error occurred from usbhub control, please'
705 ' file a bug and inform chrome-fleet-software@ team!')
706
Otabek Kasimov545739c2020-08-20 00:24:21 -0700707 def _is_usbc_pigtail_connection_timeout(self):
708 """Check if servo has issue with USBC pigtail connection timeout.
709
710 The usb_console has to be clean for good servo. If console generate
711 messages like (below) then issue is present:
712 [475635.427072 PD TMOUT RX 1/1]
713 RXERR1 Preamble
714 [475635.476044 PD TMOUT RX 1/1]
715 RXERR1 Preamble
716 """
717 if not self.servo_serial:
718 return False
719 logging.debug('Starting check if USBC pigtail connection timeout.')
720 try:
721 cmd = 'usb_console -d 18d1:501b -s %s' % self.servo_serial
722 resp = self.run(cmd, timeout=self.DEFAULT_TERMINAL_TIMEOUT)
723 result_lines = resp.stdout.splitlines()
724 for line in result_lines:
725 if re.match(self.USBC_PIGTAIL_TIMEOUT_RE, line):
726 return True
727 except Exception as e:
728 logging.debug('(Non-critical) %s.', e)
729 return False
730
731 def _reset_usbc_pigtail_connection(self):
732 """Reset USBC pigtail connection on servo board.
733
734 To reset need to run 'cc off' and then 'cc srcdts' in usb_console.
735 """
736 if not self.servo_serial:
737 return False
738 logging.debug('Starting reset USBC pigtail connection.')
739 def _run_command(cc_command):
740 """Run configuration chanel commands.
741
742 @returns: True if pas successful and False if fail.
743 """
744 try:
745 cmd = (r"echo 'cc %s' | usb_console -d 18d1:501b -s %s"
746 % (cc_command, self.servo_serial))
747 resp = self.run(cmd, timeout=self.DEFAULT_TERMINAL_TIMEOUT)
748 return True
749 except Exception as e:
750 logging.info('(Non-critical) %s.', e)
751 return False
752
753 logging.info('Turn off configuration channel. And wait 5 seconds.')
754 if _run_command('off'):
755 # wait till command will be effected
756 time.sleep(5)
757 logging.info('Turn on configuration channel. '
758 'And wait 15 seconds.')
759 if _run_command('srcdts'):
760 # wait till command will be effected
761 time.sleep(15)
762
763 def reset_usbc_pigtail_connection_on_need(self):
764 """Reset USBC pitgtail issue if it present."""
765 if not self.is_labstation():
766 logging.info('USBC pigtail reset applicable only for labstations')
767 return
768
769 if self._is_usbc_pigtail_connection_timeout():
770 logging.info('USBC pigtail issue detected on servo.')
771 self._reset_usbc_pigtail_connection()
772 fields = self._get_host_metrics_data()
773 fields['success'] = not self._is_usbc_pigtail_connection_timeout()
774 metrics.Counter(
775 'chromeos/autotest/repair/servo_usbc/reset'
776 ).increment(fields=fields)
Garry Wangffbd2162020-04-17 16:13:48 -0700777
778 def _get_servo_usb_devnum(self):
779 """Helper function to collect current usb devnum of servo.
780 """
Otabek Kasimov09192682020-06-01 18:17:44 -0700781 # TODO remove try-except when fix crbug.com/1087964
782 try:
783 cmd = 'servodtool device -s %s usb-path' % self.servo_serial
784 resp = self.run(cmd, ignore_status=True, timeout=30)
785 except Exception as e:
786 # Here we catch only timeout errors.
787 # Other errors is filtered by ignore_status=True
788 logging.debug('Attempt to get servo usb-path failed due to '
789 'timeout; %s', e)
790 return ''
791
Garry Wangffbd2162020-04-17 16:13:48 -0700792 if resp.exit_status != 0:
793 self._process_servodtool_error(resp)
794 return ''
795 usb_path = resp.stdout.strip()
796 logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
797
798 resp = self.run('cat %s/devnum' % usb_path,
799 ignore_status=True)
800 if resp.exit_status != 0:
801 self._process_servodtool_error(resp)
802 return ''
803 return resp.stdout.strip()
804
Garry Wang358aad42020-08-02 20:56:04 -0700805 def reboot_servo_v3_on_need(self):
806 """Check and reboot servo_v3 based on below conditions.
807 1. If there is an update pending on reboot.
808 2. Servo_v3 has been up for more than 96 hours.
809 """
810 if self.get_board() != 'beaglebone_servo':
811 logging.info('Servo reboot is only applicable for servo V3.')
Otabek Kasimove6df8102020-07-21 20:15:25 -0700812 return
813
Garry Wang358aad42020-08-02 20:56:04 -0700814 update_pending_reboot = (self._check_update_status() ==
815 self.UPDATE_STATE.PENDING_REBOOT)
816 uptime_hours = float(self.check_uptime())/3600
817 logging.info('Uptime of servo_v3: %s hour(s)', uptime_hours)
818 long_up_time = uptime_hours > 96
819
820 # Skip reboot if neither condition are met.
821 if not (update_pending_reboot or long_up_time):
Otabek Kasimove6df8102020-07-21 20:15:25 -0700822 return
823
Garry Wang358aad42020-08-02 20:56:04 -0700824 if update_pending_reboot:
825 message = 'Starting reboot servo_v3 because an update is pending.'
826 reboot_method = self._post_update_reboot
827 elif long_up_time:
828 message = 'Starting reboot servo_v3 because uptime > 96 hours.'
829 reboot_method = self._servo_host_reboot
830 self.record('INFO', None, None, message)
831 logging.info(message)
Otabek Kasimove6df8102020-07-21 20:15:25 -0700832 try:
Garry Wang358aad42020-08-02 20:56:04 -0700833 reboot_method()
Otabek Kasimove6df8102020-07-21 20:15:25 -0700834 message = 'Servo_v3 reboot completed successfully.'
835 except Exception as e:
836 logging.debug("Fail to reboot servo_v3; %s", e)
837 message = ('Servo_v3 reboot failed, please check debug log '
838 'for details.')
839 logging.info(message)
840 self.record('INFO', None, None, message)
Garry Wangffbd2162020-04-17 16:13:48 -0700841
842 def _reset_servo(self):
843 logging.info('Resetting servo through smart usbhub.')
Otabek Kasimov09192682020-06-01 18:17:44 -0700844 # TODO remove try-except when fix crbug.com/1087964
845 try:
846 resp = self.run('servodtool device -s %s power-cycle' %
847 self.servo_serial, ignore_status=True,
848 timeout=30)
849 if resp.exit_status != 0:
850 self._process_servodtool_error(resp)
851 return False
852 except Exception as e:
853 # Here we catch only timeout errors.
854 # Other errors is filtered by ignore_status=True
855 logging.debug('Attempt to reset servo failed due to timeout;'
856 ' %s', e)
Garry Wangffbd2162020-04-17 16:13:48 -0700857 return False
858
859 logging.debug('Wait %s seconds for servo to come back from reset.',
860 servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
861 time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
Garry Wang000c6c02020-05-11 21:27:23 -0700862 # change the flag so we can update this label in later process.
863 self.smart_usbhub = True
Garry Wangffbd2162020-04-17 16:13:48 -0700864 return True
865
866
867 def reset_servo(self):
868 """Reset(power-cycle) the servo via smart usbhub.
869 """
870 if not self.is_labstation():
871 logging.info('Servo reset is not applicable to servo_v3.')
872 return
873
874 pre_reset_devnum = self._get_servo_usb_devnum()
875 logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
876 result = self._reset_servo()
877 if not result:
Garry Wangfd5c8b62020-06-08 15:36:54 -0700878 message = ('Failed to reset servo with serial: %s. (Please ignore'
879 ' this error if the DUT is not connected to a smart'
880 ' usbhub).' % self.servo_serial)
Garry Wangffbd2162020-04-17 16:13:48 -0700881 logging.warning(message)
882 self.record('INFO', None, None, message)
883 return
884
885 post_reset_devnum = self._get_servo_usb_devnum()
886 logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
887 if not (pre_reset_devnum and post_reset_devnum):
888 message = ('Servo reset completed but unable to verify'
889 ' devnum change!')
890 elif pre_reset_devnum != post_reset_devnum:
891 message = ('Reset servo with serial %s completed successfully!'
892 % self.servo_serial)
893 else:
894 message = 'Servo reset completed but devnum is still not changed!'
895 logging.info(message)
896 self.record('INFO', None, None, message)
897
898
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800899 def _extract_compressed_logs(self, logdir, relevant_files):
900 """Decompress servod logs in |logdir|.
901
902 @param logdir: directory containing compressed servod logs.
903 @param relevant_files: list of files in |logdir| to consider.
904
905 @returns: tuple, (tarfiles, files) where
906 tarfiles: list of the compressed filenames that have been
907 extracted and deleted
908 files: list of the uncompressed files that were generated
909 """
910 # For all tar-files, first extract them to the directory, and
911 # then let the common flow handle them.
912 tarfiles = [cf for cf in relevant_files if
913 cf.endswith(self.COMPRESSION_SUFFIX)]
914 files = []
915 for f in tarfiles:
916 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
917 with tarfile.open(f) as tf:
918 # Each tarfile has only one member, as
919 # that's the compressed log.
920 member = tf.members[0]
921 # Manipulate so that it only extracts the basename, and not
922 # the directories etc.
923 member.name = norm_name
924 files.append(os.path.join(logdir, member.name))
925 tf.extract(member, logdir)
926 # File has been extracted: remove the compressed file.
927 os.remove(f)
928 return tarfiles, files
929
930 def _extract_mcu_logs(self, log_subdir):
931 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
932
933 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
934 lines from the logs to generate invidiual console logs e.g. after
935 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
936 those MCUs had any console input/output.
937
938 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
939 """
940 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
941 # files
942 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
943 if not os.path.exists(mcu_lines_file):
944 logging.info('No DEBUG logs found to extract MCU logs from.')
945 return
946 mcu_files = {}
947 mcu_file_template = '%s.txt'
948 with open(mcu_lines_file, 'r') as f:
949 for line in f:
950 match = self.MCU_EXTRACTOR.match(line)
951 if match:
952 mcu = match.group(self.MCU_GROUP).lower()
953 line = match.group(self.LINE_GROUP)
954 if mcu not in mcu_files:
955 mcu_file = os.path.join(log_subdir,
956 mcu_file_template % mcu)
957 mcu_files[mcu] = open(mcu_file, 'a')
958 fd = mcu_files[mcu]
959 fd.write(line + '\n')
960 for f in mcu_files:
961 mcu_files[f].close()
962
963
964 def remove_latest_log_symlinks(self):
965 """Remove the conveninence symlinks 'latest' servod logs."""
966 symlink_wildcard = '%s/latest*' % self.remote_log_dir
967 cmd = 'rm ' + symlink_wildcard
968 self.run(cmd, stderr_tee=None, ignore_status=True)
969
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700970 def probe_servod_restart(self, instance_ts, outdir):
971 """Grab servod logs from previous instances if part of this session.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800972
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700973 If since the last time this host called start_servod() servod crashed
974 and restarted, this helper finds those logs as well, and stores them
975 with the |OLD_LOG_SUFFIX| to investigate if necessary.
Prasad Vuppalapu5bd9da12020-03-31 01:46:47 +0000976
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700977 It also issues a panicinfo command to servo devices after the restart
978 to try and collect reboot information for debugging.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800979
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700980 @param instance_ts: the log timestamp that the current instance uses
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800981 @param outdir: directory to create a subdirectory into to place the
982 servod logs into.
983 """
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700984 if self._initial_instance_ts is None:
985 logging.info('No log timestamp grabbed successfully on servod '
986 'startup. Cannot check device restarts. Ignoring.')
987 return
988 if instance_ts == self._initial_instance_ts:
989 logging.debug('Servod appears to have run without restarting')
990 return
991 # Servod seems to have restarted (at least once). |_initial_instance_ts|
992 # is the first timestamp, and instance_ts is the current timestamp. Find
993 # all timestamps in between them, and grab the logs for each.
994 tss = self._find_instance_timestamps_between(self._initial_instance_ts,
995 instance_ts)
996 logging.info('Servod has restarted %d times between the start and the '
997 'end of this servo_host.', len(tss))
998 logging.info('This might be an issue. Will extract all logs from each '
999 'instance.')
1000 logging.info('Logs that are not the currently running (about to turn '
1001 'down) instance are maked with a .%s in their folder.',
1002 self.OLD_LOG_SUFFIX)
1003 for ts in tss:
1004 self.get_instance_logs(ts, outdir, old=True)
1005 # Lastly, servod has restarted due to a potential issue. Try to get
1006 # panic information from servo micro and servo v4 for the current logs.
1007 # This can only happen if the |_servo| attribute is initialized.
1008 if self._servo:
1009 for mcu in ['servo_micro', 'servo_v4']:
1010 ctrl = '%s_uart_cmd' % mcu
1011 if self._servo.has_control(ctrl):
1012 logging.info('Trying to retrieve %r panicinfo into logs',
1013 mcu)
1014 try:
1015 self._servo.set_nocheck(ctrl, 'panicinfo')
1016 except error.TestFail as e:
1017 logging.error('Failed to generate panicinfo for %r '
1018 'logs. %s', mcu, str(e))
1019
1020 def _find_instance_timestamps_between(self, start_ts, end_ts):
1021 """Find all log timestamps between [start_ts, end_ts).
1022
1023 @param start_ts: str, earliest log timestamp of interest
1024 @param end_ts: str, latest log timestamp of interest
1025
1026 @returns: list, all timestamps between start_ts and end_ts, end_ts
1027 exclusive, on the servo_host. An empty list on errors
1028 """
1029 # Simply get all timestamp, and then sort and remove
1030 cmd = 'ls %s' % self.remote_log_dir
1031 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1032 if res.exit_status != 0:
1033 # Here we failed to find anything.
1034 logging.info('Failed to find remote servod logs. Ignoring.')
1035 return []
1036 logfiles = res.stdout.strip().split()
1037 timestamps = set()
1038 for logfile in logfiles:
1039 ts_match = self.TS_EXTRACTOR.match(logfile)
1040 if not ts_match:
1041 # Simply ignore files that fail the check. It might be the
1042 # 'latest' symlinks or random files.
1043 continue
1044 timestamps.add(ts_match.group(self.TS_GROUP))
1045 # At this point we have all unique timestamps.
1046 timestamps = sorted(timestamps)
1047 for ts in [start_ts, end_ts]:
1048 if ts not in timestamps:
1049 logging.error('Timestamp %r not in servod logs. Cannot query '
1050 'for timestamps in between %r and %r', ts,
1051 start_ts, end_ts)
1052 return []
1053 return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
1054
1055 def get_instance_logs_ts(self):
1056 """Retrieve the currently running servod instance's log timestamp
1057
1058 @returns: str, timestamp for current instance, or None on failure
1059 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001060 # First, extract the timestamp. This cmd gives the real filename of
1061 # the latest aka current log file.
1062 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
1063 'then realpath %(dir)s/latest.DEBUG;'
1064 'elif [ -f %(dir)s/latest ];'
1065 'then realpath %(dir)s/latest;'
1066 'else exit %(code)d;'
1067 'fi' % {'dir': self.remote_log_dir,
1068 'code': self.NO_SYMLINKS_CODE})
1069 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1070 if res.exit_status != 0:
1071 if res.exit_status == self.NO_SYMLINKS_CODE:
1072 logging.warning('servod log latest symlinks not found. '
1073 'This is likely due to an error starting up '
1074 'servod. Ignoring..')
1075 else:
1076 logging.warning('Failed to find servod logs on servo host.')
1077 logging.warning(res.stderr.strip())
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001078 return None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001079 fname = os.path.basename(res.stdout.strip())
1080 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
Ruben Rodriguez Buchillone9aa2b02020-03-04 12:14:28 -08001081 ts_match = self.TS_EXTRACTOR.match(fname)
1082 if not ts_match:
1083 logging.warning('Failed to extract timestamp from servod log file '
1084 '%r. Skipping. The servo host is using outdated '
1085 'servod logging and needs to be updated.', fname)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001086 return None
1087 return ts_match.group(self.TS_GROUP)
1088
1089 def get_instance_logs(self, instance_ts, outdir, old=False):
1090 """Collect all logs with |instance_ts| and dump into a dir in |outdir|
1091
1092 This method first collects all logs on the servo_host side pertaining
1093 to this servod instance (port, instatiation). It glues them together
1094 into combined log.[level].txt files and extracts all available MCU
1095 console I/O from the logs into individual files e.g. servo_v4.txt
1096
1097 All the output can be found in a directory inside |outdir| that
1098 this generates based on |LOG_DIR|, the servod port, and the instance
1099 timestamp on the servo_host side.
1100
1101 @param instance_ts: log timestamp to grab logfiles for
1102 @param outdir: directory to create a subdirectory into to place the
1103 servod logs into.
1104 @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
1105 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001106 # Create the local results log dir.
1107 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
1108 str(self.servo_port),
1109 instance_ts))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001110 if old:
1111 log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
1112 logging.info('Saving servod logs to %r.', log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001113 os.mkdir(log_dir)
1114 # Now, get all files with that timestamp.
1115 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
1116 instance_ts)
1117 res = self.run(cmd, stderr_tee=None, ignore_status=True)
1118 files = res.stdout.strip().split()
1119 try:
1120 self.get_file(files, log_dir, try_rsync=False)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001121 if not os.listdir(log_dir):
1122 logging.info('No servod logs retrieved. Ignoring, and removing '
1123 '%r again.', log_dir)
1124 os.rmdir(log_dir)
1125 return
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001126 except error.AutoservRunError as e:
1127 result = e.result_obj
1128 if result.exit_status != 0:
1129 stderr = result.stderr.strip()
1130 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
1131 stderr or '\n%s' % result)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001132 # Remove the log_dir as nothing was added to it.
1133 os.rmdir(log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001134 return
1135 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
1136 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
1137 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
1138 # Create the joint files for each loglevel. i.e log.DEBUG
1139 joint_file = self.JOINT_LOG_PREFIX
1140 if level_name:
1141 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
1142 # This helps with some online tools to avoid complaints about an
1143 # unknown filetype.
1144 joint_file = joint_file + '.txt'
1145 joint_path = os.path.join(log_dir, joint_file)
1146 files = [f for f in local_files if level_name in f]
1147 if not files:
1148 # TODO(crrev.com/c/1793030): remove no-level case once CL
1149 # is pushed
1150 continue
1151 # Extract compressed logs if any.
1152 compressed, extracted = self._extract_compressed_logs(log_dir,
1153 files)
1154 files = list(set(files) - set(compressed))
1155 files.extend(extracted)
1156 # Need to sort. As they all share the same timestamp, and
1157 # loglevel, the index itself is sufficient. The highest index
1158 # is the oldest file, therefore we need a descending sort.
1159 def sortkey(f, level=level_name):
1160 """Custom sortkey to sort based on rotation number int."""
1161 if f.endswith(level_name): return 0
1162 return int(f.split('.')[-1])
1163
1164 files.sort(reverse=True, key=sortkey)
1165 # Just rename the first file rather than building from scratch.
1166 os.rename(files[0], joint_path)
1167 with open(joint_path, 'a') as joint_f:
1168 for logfile in files[1:]:
1169 # Transfer the file to the joint file line by line.
1170 with open(logfile, 'r') as log_f:
1171 for line in log_f:
1172 joint_f.write(line)
1173 # File has been written over. Delete safely.
1174 os.remove(logfile)
1175 # Need to remove all files form |local_files| so we don't
1176 # analyze them again.
1177 local_files = list(set(local_files) - set(files) - set(compressed))
1178 # Lastly, extract MCU logs from the joint logs.
1179 self._extract_mcu_logs(log_dir)
1180
Garry Wang79e9af62019-06-12 15:19:19 -07001181 def _lock(self):
1182 """lock servohost by touching a file.
1183 """
1184 logging.debug('Locking servohost %s by touching %s file',
1185 self.hostname, self._lock_file)
1186 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001187 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -07001188
1189
1190 def _unlock(self):
1191 """Unlock servohost by removing the lock file.
1192 """
1193 logging.debug('Unlocking servohost by removing %s file',
1194 self._lock_file)
1195 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001196 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -07001197
1198
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001199 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -07001200 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001201 # NOTE: throughout this method there are multiple attempts to catch
1202 # all errors. This is WAI as log grabbing should not fail tests.
1203 # However, the goal is to catch and handle/process all errors, thus
1204 # we print the traceback and ask for a bug.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001205 if self._closed:
1206 logging.debug('ServoHost is already closed.')
1207 return
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001208 instance_ts = self.get_instance_logs_ts()
1209 # TODO(crbug.com/1011516): once enabled, remove the check against
1210 # localhost and instead check against log-rotiation enablement.
1211 logs_available = (instance_ts is not None and
1212 self.job and
1213 not self.is_localhost())
1214 if logs_available:
1215 # Probe whether there was a servod restart, and grab those old
1216 # logs as well.
1217 try:
1218 self.probe_servod_restart(instance_ts, self.job.resultdir)
1219 except (error.AutoservRunError, error.TestFail) as e:
1220 logging.info('Failed to grab servo logs due to: %s. '
1221 'This error is forgiven.', str(e))
1222 except Exception as e:
1223 logging.error('Unexpected error probing for old logs. %s. '
1224 'Forgiven. Please file a bug and fix or catch '
1225 'in log probing function', str(e),
1226 exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001227 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001228 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -07001229 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001230 self._servo.close(outdir)
1231
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001232 if logs_available:
1233 # Grab current (not old like above) logs after the servo instance
1234 # was closed out.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001235 try:
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001236 self.get_instance_logs(instance_ts, self.job.resultdir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001237 except error.AutoservRunError as e:
1238 logging.info('Failed to grab servo logs due to: %s. '
1239 'This error is forgiven.', str(e))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001240 except Exception as e:
1241 logging.error('Unexpected error grabbing servod logs. %s. '
1242 'Forgiven. Please file a bug and fix or catch '
1243 'in log grabbing function', str(e), exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001244
Garry Wang7c00b0f2019-06-25 17:28:17 -07001245 if self._is_locked:
1246 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -07001247 try:
1248 self._unlock()
1249 except error.AutoservSSHTimeout:
1250 logging.error('Unlock servohost failed due to ssh timeout.'
1251 ' It may caused by servohost went down during'
1252 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -08001253 # We want always stop servod after task to minimum the impact of bad
1254 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -08001255 try:
1256 self.stop_servod()
1257 except error.AutoservRunError as e:
1258 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001259 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -08001260
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001261 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001262 # Mark closed.
1263 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001264
1265
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001266 def get_servo_state(self):
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001267 return self._servo_state
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001268
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001269 def _get_host_metrics_data(self):
1270 return {'port': self.servo_port,
Otabek Kasimov0ea47362020-07-11 20:55:09 -07001271 'host': self.get_dut_hostname() or self.hostname,
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001272 'board': self.servo_board or ''}
1273
1274 def _is_servo_device_connected(self, servo_type, serial):
1275 """Check if device is connected to the labstation.
1276
1277 Works for all servo devices connected to the labstation.
1278 For servo_v3 please use 'self._is_servo_board_present_on_servo_v3'
1279
1280 @param servo_type: The type of servo device. Expecting value can be
1281 servo_v4 or servo_micro.
1282 @param serial: The serial number of the device to detect it.
1283 """
1284 vid_pid = self.SERVO_VID_PID.get(servo_type)
1285 if not vid_pid or not serial:
1286 # device cannot detected without VID/PID or serial number
1287 return False
1288 logging.debug('Started to detect %s', servo_type)
1289 try:
1290 cmd = 'lsusb -v -d %s |grep iSerial |grep %s' % (vid_pid, serial)
1291 result = self.run(cmd, ignore_status=True, timeout=30)
1292 if result.exit_status == 0 and result.stdout.strip():
1293 logging.debug('The %s is plugged in to the host.', servo_type)
1294 return True
1295 logging.debug('%s device is not detected; %s', servo_type, result)
1296 return False
1297 except Exception as e:
1298 # can be triggered by timeout issue due running the script
1299 metrics.Counter(
1300 'chromeos/autotest/repair/servo_detection/timeout'
1301 ).increment(fields=self._get_host_metrics_data())
1302 logging.error('%s device is not detected; %s', servo_type, str(e))
1303 return None
1304
1305 def _is_servo_board_present_on_servo_v3(self):
1306 """Check if servo board is detected on servo_v3"""
1307 vid_pids = self.SERVO_VID_PID['servo_v3']
1308 if not vid_pids or len(vid_pids) == 0:
1309 # device cannot detected without VID/PID
1310 return False
1311 logging.debug('Started to detect servo board on servo_v3')
1312 not_detected = 'The servo board is not detected on servo_v3'
1313 try:
1314 cmd = 'lsusb | grep "%s"' % "\|".join(vid_pids)
1315 result = self.run(cmd, ignore_status=True, timeout=30)
1316 if result.exit_status == 0 and result.stdout.strip():
1317 logging.debug('The servo board is detected on servo_v3')
1318 return True
1319 logging.debug('%s; %s', not_detected, result)
1320 return False
1321 except Exception as e:
1322 # can be triggered by timeout issue due running the script
1323 metrics.Counter(
1324 'chromeos/autotest/repair/servo_detection/timeout'
1325 ).increment(fields=self._get_host_metrics_data())
1326 logging.error('%s; %s', not_detected, str(e))
1327 return None
1328
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001329 def _is_main_device_not_detected_on_servo_v4(self):
1330 """Check if servod cannot find main device on servo.
1331
1332 The check based on reading servod logs for servo_v4.
1333 """
1334 if not self._initial_instance_ts:
1335 # latest log not found
1336 return False
1337 logging.debug('latest log for servod created at %s',
1338 self._initial_instance_ts)
1339 try:
1340 log_created = calendar.timegm(time.strptime(
1341 self._initial_instance_ts,
1342 "%Y-%m-%d--%H-%M-%S.%f"))
1343 except ValueError as e:
1344 logging.debug('Cannot read time from log file name: %s',
1345 self._initial_instance_ts)
1346 return False
1347 min_time_created = calendar.timegm(time.gmtime())
1348 if min_time_created > log_created + 3600:
1349 # the log file is old we cannot use it
1350 logging.debug('log file was created more than hour ago, too old')
1351 return False
1352 logging.debug('latest log was created not longer then 1 hour ago')
1353
1354 # check if servod can detect main device by servo_v4
1355 message = 'ERROR - No servo micro or CCD detected for board'
1356 cmd = ('cat /var/log/servod_%s/log.%s.INFO |grep "%s"'
1357 % (self.servo_port, self._initial_instance_ts, message))
1358 result = self.run(cmd, ignore_status=True)
1359 if result.stdout.strip():
1360 logging.info('Servod cannot detect main device on the servo; '
1361 'Can be caused by bad hardware of servo or '
1362 'issue on the DUT side.')
1363 return True
1364 logging.debug('The main device is detected')
1365 return False
1366
Otabek Kasimov15963492020-06-23 21:10:51 -07001367 def get_verify_state(self, tag):
1368 """Return the state of servo verifier.
1369
1370 @returns: bool or None
1371 """
1372 return self._repair_strategy.verifier_is_good(tag)
1373
1374 def determine_servo_state(self):
1375 """Determine servo state based on the failed verifier.
1376
1377 @returns: servo state value
1378 The state detecting based on first fail verifier or collecting of
1379 them.
1380 """
1381 ssh = self.get_verify_state('servo_ssh')
1382 disk_space = self.get_verify_state('disk_space')
1383 start_servod = self.get_verify_state('servod_job')
1384 create_servo = self.get_verify_state('servod_connection')
1385 init_servo = self.get_verify_state('servod_control')
1386 pwr_button = self.get_verify_state('pwr_button')
1387 lid_open = self.get_verify_state('lid_open')
1388 ec_board = self.get_verify_state('ec_board')
1389 ccd_testlab = self.get_verify_state('ccd_testlab')
1390
1391 if not ssh:
1392 return servo_constants.SERVO_STATE_NO_SSH
1393
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001394 if (start_servod == self.VERIFY_FAILED
1395 or create_servo == self.VERIFY_FAILED):
1396 # sometimes servo can start with out present servo
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001397 if self.is_labstation():
1398 if not self.servo_serial:
1399 return servo_constants.SERVO_STATE_WRONG_CONFIG
1400 if self._is_servo_device_connected(
1401 'servo_v4',
1402 self.servo_serial) == False:
1403 return servo_constants.SERVO_STATE_NOT_CONNECTED
1404 elif self._is_servo_board_present_on_servo_v3() == False:
1405 return servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001406
1407 if start_servod == self.VERIFY_FAILED:
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001408 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1409
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001410 if create_servo == self.VERIFY_FAILED:
1411 if (self.is_labstation()
1412 and self._is_main_device_not_detected_on_servo_v4()):
1413 servo_type = None
1414 if self.get_dut_host_info():
1415 servo_type = self.get_dut_host_info().get_label_value(
1416 servo_constants.SERVO_TYPE_LABEL_PREFIX)
1417 if servo_type and 'servo_micro' in servo_type:
1418 serial = self.get_servo_micro_serial_number()
1419 logging.debug('servo_micro serial: %s', serial)
1420 if self._is_servo_device_detected('servo_micro',
1421 serial):
1422 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1423 # Device can be not detected because of DUT
1424 # TODO (otabek) update after b/159755652 and b/159754985
1425 metrics.Counter(
1426 'chromeos/autotest/repair/servo_state/needs_replacement'
1427 ).increment(fields=self._get_host_metrics_data())
1428 elif not self.is_labstation():
1429 # Here need logic to check if flex cable is connected
1430 pass
1431
Otabek Kasimov15963492020-06-23 21:10:51 -07001432 # one of the reason why servo can not initialized
1433 if ccd_testlab == self.VERIFY_FAILED:
1434 return servo_constants.SERVO_STATE_CCD_TESTLAB_ISSUE
1435
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001436 if (create_servo == self.VERIFY_FAILED
1437 or init_servo == self.VERIFY_FAILED):
Otabek Kasimov15963492020-06-23 21:10:51 -07001438 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1439
Otabek Kasimov015c15c2020-08-20 00:40:42 -07001440 if ec_board == self.VERIFY_FAILED:
1441 return servo_constants.SERVO_STATE_EC_BROKEN
Otabek Kasimov15963492020-06-23 21:10:51 -07001442 if pwr_button == self.VERIFY_FAILED:
1443 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1444 if lid_open == self.VERIFY_FAILED:
1445 return servo_constants.SERVO_STATE_LID_OPEN_FAILED
Otabek Kasimov15963492020-06-23 21:10:51 -07001446
Otabek Kasimov15963492020-06-23 21:10:51 -07001447 metrics.Counter(
1448 'chromeos/autotest/repair/unknown_servo_state'
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001449 ).increment(fields=self._get_host_metrics_data())
Otabek Kasimov15963492020-06-23 21:10:51 -07001450 logging.info('We do not have special state for this failure yet :)')
1451 return servo_constants.SERVO_STATE_BROKEN
1452
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001453
Richard Barnetteea3e4602016-06-10 12:36:41 -07001454def make_servo_hostname(dut_hostname):
1455 """Given a DUT's hostname, return the hostname of its servo.
1456
1457 @param dut_hostname: hostname of a DUT.
1458
1459 @return hostname of the DUT's servo.
1460
1461 """
1462 host_parts = dut_hostname.split('.')
1463 host_parts[0] = host_parts[0] + '-servo'
1464 return '.'.join(host_parts)
1465
1466
1467def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001468 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001469
1470 @param servo_hostname: hostname of the servo host.
1471
1472 @return True if it's up, False otherwise
1473 """
1474 # Technically, this duplicates the SSH ping done early in the servo
1475 # proxy initialization code. However, this ping ends in a couple
1476 # seconds when if fails, rather than the 60 seconds it takes to decide
1477 # that an SSH ping has timed out. Specifically, that timeout happens
1478 # when our servo DNS name resolves, but there is no host at that IP.
1479 logging.info('Pinging servo host at %s', servo_hostname)
1480 ping_config = ping_runner.PingConfig(
1481 servo_hostname, count=3,
1482 ignore_result=True, ignore_status=True)
1483 return ping_runner.PingRunner().ping(ping_config).received > 0
1484
1485
Richard Barnettee519dcd2016-08-15 17:37:17 -07001486def _map_afe_board_to_servo_board(afe_board):
1487 """Map a board we get from the AFE to a servo appropriate value.
1488
1489 Many boards are identical to other boards for servo's purposes.
1490 This function makes that mapping.
1491
1492 @param afe_board string board name received from AFE.
1493 @return board we expect servo to have.
1494
1495 """
1496 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1497 BOARD_MAP = {'gizmo': 'panther'}
1498 mapped_board = afe_board
1499 if afe_board in BOARD_MAP:
1500 mapped_board = BOARD_MAP[afe_board]
1501 else:
1502 for suffix in KNOWN_SUFFIXES:
1503 if afe_board.endswith(suffix):
1504 mapped_board = afe_board[0:-len(suffix)]
1505 break
1506 if mapped_board != afe_board:
1507 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1508 return mapped_board
1509
1510
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001511def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001512 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001513
Richard Barnetteea3e4602016-06-10 12:36:41 -07001514 @param dut_host Instance of `Host` on which to find the servo
1515 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -07001516 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001517 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001518 info = dut_host.host_info_store.get()
1519 servo_args = {k: v for k, v in info.attributes.iteritems()
Garry Wang11b5e872020-03-11 15:14:08 -07001520 if k in servo_constants.SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -07001521
Garry Wang11b5e872020-03-11 15:14:08 -07001522 if servo_constants.SERVO_PORT_ATTR in servo_args:
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001523 try:
Garry Wang11b5e872020-03-11 15:14:08 -07001524 servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1525 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001526 except ValueError:
1527 logging.error('servo port is not an int: %s',
Garry Wang11b5e872020-03-11 15:14:08 -07001528 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001529 # Reset servo_args because we don't want to use an invalid port.
Garry Wang11b5e872020-03-11 15:14:08 -07001530 servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001531
1532 if info.board:
Garry Wang11b5e872020-03-11 15:14:08 -07001533 servo_board = _map_afe_board_to_servo_board(info.board)
1534 servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -07001535 if info.model:
Garry Wang11b5e872020-03-11 15:14:08 -07001536 servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1537 return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -07001538
1539
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001540def _tweak_args_for_ssp_moblab(servo_args):
Garry Wang11b5e872020-03-11 15:14:08 -07001541 if (servo_args[servo_constants.SERVO_HOST_ATTR]
1542 in ['localhost', '127.0.0.1']):
1543 servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001544 'SSP', 'host_container_ip', type=str, default=None)
1545
1546
Dan Shi023aae32016-05-25 11:13:01 -07001547def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001548 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001549 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -08001550
Richard Barnette9a26ad62016-06-10 12:03:08 -07001551 This function attempts to create and verify or repair a `ServoHost`
1552 object for a servo connected to the given `dut`, subject to various
1553 constraints imposed by the parameters:
1554 * When the `servo_args` parameter is not `None`, a servo
1555 host must be created, and must be checked with `repair()`.
1556 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1557 true:
1558 * If `try_servo_repair` is true, then create a servo host and
1559 check it with `repair()`.
1560 * Otherwise, if the servo responds to `ping` then create a
1561 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -08001562
Richard Barnette9a26ad62016-06-10 12:03:08 -07001563 In cases where `servo_args` was not `None`, repair failure
1564 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001565 are logged and then discarded. Note that this only happens in cases
1566 where we're called from a test (not special task) control file that
1567 has an explicit dependency on servo. In that case, we require that
1568 repair not write to `status.log`, so as to avoid polluting test
1569 results.
1570
1571 TODO(jrbarnette): The special handling for servo in test control
1572 files is a thorn in my flesh; I dearly hope to see it cut out before
1573 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001574
1575 Parameters for a servo host consist of a host name, port number, and
1576 DUT board, and are determined from one of these sources, in order of
1577 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -07001578 * Servo attributes from the `dut` parameter take precedence over
1579 all other sources of information.
1580 * If a DNS entry for the servo based on the DUT hostname exists in
1581 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -07001582 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001583 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -07001584 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001585
1586 @param dut An instance of `Host` from which to take
1587 servo parameters (if available).
1588 @param servo_args A dictionary with servo parameters to use if
1589 they can't be found from `dut`. If this
1590 argument is supplied, unrepaired exceptions
1591 from `verify()` will be passed back to the
1592 caller.
1593 @param try_lab_servo If not true, servo host creation will be
1594 skipped unless otherwise required by the
1595 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001596 @param try_servo_repair If true, check a servo host with
1597 `repair()` instead of `verify()`.
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001598 @param dut_host_info: A HostInfo object of the DUT that connected
1599 to this servo.
Dan Shi4d478522014-02-14 13:46:32 -08001600
1601 @returns: A ServoHost object or None. See comments above.
1602
1603 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001604 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001605 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001606 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -07001607 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001608 if utils.in_moblab_ssp():
1609 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001610 logging.debug(
1611 'Overriding provided servo_args (%s) with arguments'
1612 ' determined from the host (%s)',
1613 servo_args,
1614 servo_args_override,
1615 )
Richard Barnetteea3e4602016-06-10 12:36:41 -07001616 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001617
Richard Barnetteea3e4602016-06-10 12:36:41 -07001618 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001619 logging.debug('No servo_args provided, and failed to find overrides.')
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001620 if try_lab_servo or servo_dependency:
Otabek Kasimov646812c2020-06-23 20:01:36 -07001621 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001622 else:
1623 # For regular test case which not required the servo
1624 return None, None
1625
Garry Wang11b5e872020-03-11 15:14:08 -07001626 servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1627 servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001628 if not _is_servo_host_information_exist(servo_hostname, servo_port):
1629 logging.debug(
1630 'Servo connection info missed hostname: %s , port: %s',
1631 servo_hostname, servo_port)
Otabek Kasimov646812c2020-06-23 20:01:36 -07001632 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001633 if not is_servo_host_information_valid(servo_hostname, servo_port):
1634 logging.debug(
1635 'Servo connection info is incorrect hostname: %s , port: %s',
1636 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001637 return None, servo_constants.SERVO_STATE_WRONG_CONFIG
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001638 if (not servo_dependency and not try_servo_repair and
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001639 not servo_host_is_up(servo_hostname)):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001640 logging.debug('ServoHost is not up.')
Otabek Kasimov646812c2020-06-23 20:01:36 -07001641 return None, servo_constants.SERVO_STATE_NO_SSH
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001642
Garry Wangebc015b2019-06-06 17:45:06 -07001643 newhost = ServoHost(**servo_args)
Garry Wangffbd2162020-04-17 16:13:48 -07001644
Otabek Kasimove6df8102020-07-21 20:15:25 -07001645 # Reset or reboot servo device only during AdminRepair tasks.
1646 if try_servo_repair:
1647 if newhost._is_locked:
1648 # Reset servo if the servo is locked, as we check if the servohost
1649 # is up, if the servohost is labstation and if the servohost is in
1650 # lab inside the locking logic.
Otabek Kasimov545739c2020-08-20 00:24:21 -07001651 newhost.reset_usbc_pigtail_connection_on_need()
Otabek Kasimove6df8102020-07-21 20:15:25 -07001652 newhost.reset_servo()
1653 else:
Garry Wang358aad42020-08-02 20:56:04 -07001654 try:
1655 newhost.reboot_servo_v3_on_need()
Garry Wang1f0d5332020-08-10 19:32:32 -07001656 except Exception as e:
1657 logging.info('[Non-critical] Unexpected error while trying to'
1658 ' reboot servo_v3, skipping the reboot; %s', e)
Otabek Kasimove6df8102020-07-21 20:15:25 -07001659
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -07001660 if dut:
1661 newhost.set_dut_hostname(dut.hostname)
Otabek Kasimov9e90ae12020-08-14 03:01:19 -07001662 if dut_host_info:
1663 newhost.set_dut_host_info(dut_host_info)
Garry Wangffbd2162020-04-17 16:13:48 -07001664
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001665 if try_lab_servo or try_servo_repair:
1666 try:
1667 logging.info("Check and update servo firmware.")
1668 servo_updater.update_servo_firmware(
1669 newhost,
1670 force_update=False)
1671 except Exception as e:
1672 logging.error("Servo device update error: %s", e)
1673
Garry Wangcdd27b22020-01-13 14:59:11 -08001674 try:
1675 newhost.restart_servod(quick_startup=True)
1676 except error.AutoservSSHTimeout:
1677 logging.warning("Restart servod failed due ssh connection "
1678 "to servohost timed out. This error is forgiven"
1679 " here, we will retry in servo repair process.")
1680 except error.AutoservRunError as e:
1681 logging.warning("Restart servod failed due to:\n%s\n"
1682 "This error is forgiven here, we will retry"
1683 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -07001684
Richard Barnette9a26ad62016-06-10 12:03:08 -07001685 # Note that the logic of repair() includes everything done
1686 # by verify(). It's sufficient to call one or the other;
1687 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001688 if servo_dependency:
1689 newhost.repair(silent=True)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001690 return newhost, newhost.get_servo_state()
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001691
1692 if try_servo_repair:
1693 try:
1694 newhost.repair()
1695 except Exception:
1696 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -07001697 else:
1698 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001699 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001700 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001701 logging.exception('servo verify failed for %s', newhost.hostname)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001702 return newhost, newhost.get_servo_state()
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001703
1704
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001705def _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001706 if hostname is None or len(hostname.strip()) == 0:
1707 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001708 if port is None:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001709 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001710 if not type(port) is int:
1711 try:
1712 int(port)
1713 except ValueError:
1714 return False
1715
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001716 return True
1717
1718
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001719def is_servo_host_information_valid(hostname, port):
Garry Wang9b8f2342020-04-17 16:34:09 -07001720 """Check if provided servo attributes are valid.
1721
1722 @param hostname Hostname of the servohost.
1723 @param port servo port number.
1724
1725 @returns: A bool value to indicate if provided servo attribute valid.
1726 """
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001727 if not _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001728 return False
1729 # checking range and correct of the port
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001730 port_int = int(port)
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001731 if port_int < 1 or port_int > 65000:
1732 return False
1733 # we expecting host contain only latters, digits and '-' or '_'
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001734 if not re.match('[a-zA-Z0-9-_\.]*$', hostname) or len(hostname) < 5:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001735 return False
1736 return True