blob: e66a95adc5914d0005332d121d880dbd4ee30852 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
Fang Deng5d518f42013-08-02 14:04:32 -070012import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060013import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080014import re
15import tarfile
Andrew McRaef0679932020-08-13 09:15:23 +100016import threading
Garry Wangc1288cf2019-12-17 14:58:00 -080017import time
Dana Goyette4dc0adc2019-05-06 14:51:53 -070018import xmlrpclib
Otabek Kasimov120b6fa2020-07-03 00:15:27 -070019import calendar
Fang Deng5d518f42013-08-02 14:04:32 -070020
21from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070022from autotest_lib.client.common_lib import error
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Garry Wang7b0e1b72020-03-25 19:08:59 -070024from autotest_lib.client.common_lib import lsbrelease_utils
Fang Deng5d518f42013-08-02 14:04:32 -070025from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070026from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070027from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000028from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070029from autotest_lib.server.hosts import base_servohost
Garry Wang11b5e872020-03-11 15:14:08 -070030from autotest_lib.server.hosts import servo_constants
Otabek Kasimov4ea636e2020-04-14 23:35:06 -070031from autotest_lib.server.cros.faft.utils import config
Garry Wang11b5e872020-03-11 15:14:08 -070032from autotest_lib.client.common_lib import global_config
Otabek Kasimov8475cce2020-07-14 12:11:31 -070033from autotest_lib.site_utils.admin_audit import servo_updater
Garry Wangd7367482020-02-27 13:52:40 -080034
Otabek Kasimov15963492020-06-23 21:10:51 -070035try:
36 from chromite.lib import metrics
37except ImportError:
38 metrics = utils.metrics_mock
39
Dan Shi3b2adf62015-09-02 17:46:54 -070040_CONFIG = global_config.global_config
Fang Deng5d518f42013-08-02 14:04:32 -070041
Otabek Kasimova7ba91a2020-03-09 08:31:01 -070042
Garry Wangebc015b2019-06-06 17:45:06 -070043class ServoHost(base_servohost.BaseServoHost):
44 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070045 that with a servo instance for a specific port.
46
47 @type _servo: servo.Servo | None
48 """
Fang Deng5d518f42013-08-02 14:04:32 -070049
Raul E Rangel52ca2e82018-07-03 14:10:14 -060050 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070051
Dan Shie5b3c512014-08-21 12:12:09 -070052 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070053 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070054
xixuan6cf6d2f2016-01-29 15:29:00 -080055 # Ready test function
56 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070057
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080058 # Directory prefix on the servo host where the servod logs are stored.
59 SERVOD_LOG_PREFIX = '/var/log/servod'
60
61 # Exit code to use when symlinks for servod logs are not found.
62 NO_SYMLINKS_CODE = 9
63
64 # Directory in the job's results directory to dump the logs into.
65 LOG_DIR = 'servod'
66
67 # Prefix for joint loglevel files in the logs.
68 JOINT_LOG_PREFIX = 'log'
69
70 # Regex group to extract timestamp from logfile name.
71 TS_GROUP = 'ts'
72
73 # This regex is used to extract the timestamp from servod logs.
74 # files always start with log.
75 TS_RE = (r'log.'
76 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
77 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
78 # The loglevel is optional depending on labstation version.
79 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
80 TS_EXTRACTOR = re.compile(TS_RE)
81
82 # Regex group to extract MCU name from logline in servod logs.
83 MCU_GROUP = 'mcu'
84
85 # Regex group to extract logline from MCU logline in servod logs.
86 LINE_GROUP = 'line'
87
88 # This regex is used to extract the mcu and the line content from an
89 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
90 # Here is an example log-line:
91 #
92 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
93 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
94 #
95 # Here is conceptually how they are formatted:
96 #
97 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
98 # <output>
99 #
100 # The log format starts with a timestamp
101 MCU_RE = (r'[\d\-]+ [\d:,]+ '
102 # The mcu that is logging this is next.
103 r'- (?P<%s>\w+) - '
104 # Next, we have more log outputs before the actual line.
105 # Information about the file line, logging function etc.
106 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
107 # NOTE: if the log format changes, this regex needs to be
108 # adjusted.
109 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
110 # Lastly, we get the MCU's console line.
111 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
112 MCU_EXTRACTOR = re.compile(MCU_RE)
113
114 # Suffix to identify compressed logfiles.
115 COMPRESSION_SUFFIX = '.tbz2'
116
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700117 # A suffix to mark servod log directories that came from instance that
118 # ran during this servo_host, but are not the last one running e.g. when
119 # an instance (on purpose, or due to a bug) restarted in the middle of the
120 # run.
121 OLD_LOG_SUFFIX = 'old'
122
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700123 # Mapping servo board with their vid-pid
124 SERVO_VID_PID = {
125 'servo_v4':'18d1:501b',
126 'ccd_cr50':'18d1:5014',
127 'servo_micro':'18d1:501a',
128 'servo_v3':['18d1:5004', '0403:6014'],
129 }
130
Otabek Kasimov15963492020-06-23 21:10:51 -0700131 # States of verifiers
132 # True - verifier run and passed
133 # False - verifier run and failed
134 # None - verifier did not run or dependency failed
135 VERIFY_SUCCESS = True
136 VERIFY_FAILED = False
137 VERIFY_NOT_RUN = None
138
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800139 def _init_attributes(self):
140 self._servo_state = None
141 self.servo_port = None
142 self.servo_board = None
143 self.servo_model = None
144 self.servo_serial = None
Garry Wang000c6c02020-05-11 21:27:23 -0700145 # The flag that indicate if a servo is connected to a smart usbhub.
146 # TODO(xianuowang@) remove this flag once all usbhubs in the lab
147 # get replaced.
148 self.smart_usbhub = None
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800149 self._servo = None
Andrew McRaef0679932020-08-13 09:15:23 +1000150 self._tunnel_proxy = None
151 self._tunnel_proxy_lock = threading.Lock()
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700152 self._initial_instance_ts = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800153 # Flag to make sure that multiple calls to close do not result in the
154 # logic executing multiple times.
155 self._closed = False
Andrew McRaef0679932020-08-13 09:15:23 +1000156 # Per-thread local data
157 self._local = threading.local()
Fang Deng5d518f42013-08-02 14:04:32 -0700158
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700159 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700160 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700161 servo_model=None, servo_serial=None, is_in_lab=None,
162 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700163 """Initialize a ServoHost instance.
164
165 A ServoHost instance represents a host that controls a servo.
166
167 @param servo_host: Name of the host where the servod process
168 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600169 @param servo_port: Port the servod process is listening on. Defaults
170 to the SERVOD_PORT environment variable if set,
171 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700172 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700173 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800174 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
175 to None, for which utils.host_is_in_lab_zone will be
176 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700177
178 """
179 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700180 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800181 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700182 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700183 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700184 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700185 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800186
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800187 # The location of the log files on the servo host for this instance.
188 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
189 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700190 # Path of the servo host lock file.
191 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
192 + self.LOCK_FILE_POSTFIX)
193 # File path to declare a reboot request.
194 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
195 + self.REBOOT_FILE_POSTFIX)
196
197 # Lock the servo host if it's an in-lab labstation to prevent other
198 # task to reboot it until current task completes. We also wait and
199 # make sure the labstation is up here, in the case of the labstation is
200 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700201 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700202 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
203 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700204 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700205
Richard Barnette9a26ad62016-06-10 12:03:08 -0700206 self._repair_strategy = (
207 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700208
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700209 def __str__(self):
210 return "<%s '%s:%s'>" % (
211 type(self).__name__, self.hostname, self.servo_port)
212
Richard Barnette9a26ad62016-06-10 12:03:08 -0700213 def connect_servo(self):
Garry Wang8c8dc972020-06-09 13:41:51 -0700214 """ Initialize and setup servo for later use.
215 """
216 self.initilize_servo()
217 self.initialize_dut_for_servo()
218
219
220 def initilize_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700221 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700222
223 Initializes `self._servo` and then verifies that all network
224 connections are working. This will create an ssh tunnel if
225 it's required.
Garry Wang8c8dc972020-06-09 13:41:51 -0700226 """
227 self._servo = servo.Servo(servo_host=self,
228 servo_serial=self.servo_serial)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700229
Garry Wang8c8dc972020-06-09 13:41:51 -0700230
231 def initialize_dut_for_servo(self):
232 """This method will do some setup for dut control, e.g. setup
233 main servo_v4 device, and also testing the connection between servo
234 and DUT. As a side effect of testing the connection, all signals on
235 the target servo are reset to default values, and the USB stick is
Richard Barnette9a26ad62016-06-10 12:03:08 -0700236 set to the neutral (off) position.
237 """
Garry Wang8c8dc972020-06-09 13:41:51 -0700238 if not self._servo:
239 raise hosts.AutoservVerifyError('Servo object needs to be'
240 ' initialized before initialize'
241 ' DUT.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700242 timeout, _ = retry.timeout(
Garry Wang8c8dc972020-06-09 13:41:51 -0700243 self._servo.initialize_dut,
244 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700245 if timeout:
Garry Wang8c8dc972020-06-09 13:41:51 -0700246 raise hosts.AutoservVerifyError('Initialize dut for servo timed'
247 ' out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700248
249
250 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700251 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700252
253 If we've previously successfully connected to our servo,
254 disconnect any established ssh tunnel, and set `self._servo`
255 back to `None`.
256 """
257 if self._servo:
258 # N.B. This call is safe even without a tunnel:
259 # rpc_server_tracker.disconnect() silently ignores
260 # unknown ports.
261 self.rpc_server_tracker.disconnect(self.servo_port)
262 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700263
Garry Wangc1288cf2019-12-17 14:58:00 -0800264
Andrew McRaef0679932020-08-13 09:15:23 +1000265 def _maybe_create_servod_ssh_tunnel_proxy(self):
266 """Create a xmlrpc proxy for use with a ssh tunnel.
267 A lock is used to safely create a singleton proxy.
268 """
269 with self._tunnel_proxy_lock:
270 if self._tunnel_proxy is None:
271 self._tunnel_proxy = self.rpc_server_tracker.xmlrpc_connect(
272 None,
273 self.servo_port,
274 ready_test_name=self.SERVO_READY_METHOD,
275 timeout_seconds=60,
276 request_timeout_seconds=3600,
277 server_desc=str(self))
278
279
280 def get_servod_server_proxy(self):
281 """Return a proxy if it exists; otherwise, create a new one.
282 A proxy can either be a ssh tunnel based proxy, or a httplib
283 based proxy.
Fang Deng5d518f42013-08-02 14:04:32 -0700284
285 @returns: An xmlrpclib.ServerProxy that is connected to the servod
286 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700287 """
Garry Wang11b5e872020-03-11 15:14:08 -0700288 if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
289 and not self.is_localhost()):
Andrew McRaef0679932020-08-13 09:15:23 +1000290 # Check for existing ssh tunnel proxy.
291 if self._tunnel_proxy is None:
292 self._maybe_create_servod_ssh_tunnel_proxy()
293 return self._tunnel_proxy
Richard Barnette9a26ad62016-06-10 12:03:08 -0700294 else:
Andrew McRaef0679932020-08-13 09:15:23 +1000295 # xmlrpc/httplib is not thread-safe, so each thread must have its
296 # own separate proxy connection.
297 if not hasattr(self._local, "_per_thread_proxy"):
298 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
299 self._local._per_thread_proxy = xmlrpclib.ServerProxy(remote)
300 return self._local._per_thread_proxy
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800301
302
Richard Barnette1edbb162016-11-01 11:47:50 -0700303 def verify(self, silent=False):
304 """Update the servo host and verify it's in a good state.
305
306 @param silent If true, suppress logging in `status.log`.
307 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700308 message = 'Beginning verify for servo host %s port %s serial %s'
309 message %= (self.hostname, self.servo_port, self.servo_serial)
310 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700311 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700312 self._repair_strategy.verify(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700313 self._servo_state = servo_constants.SERVO_STATE_WORKING
314 self.record('INFO', None, None,
315 'ServoHost verify set servo_state as WORKING')
Garry Wang63b8c382020-03-11 22:28:40 -0700316 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700317 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700318 self._servo_state = self.determine_servo_state()
319 self.record('INFO', None, None,
320 'ServoHost verify set servo_state as %s'
321 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700322 if self._is_critical_error(e):
323 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700324
325
Garry Wang7b0e1b72020-03-25 19:08:59 -0700326 def get_image_name_from_usbkey(self, usbkey_dev):
327 """Mount usb drive and check ChromeOS image name on it if there is
328 one. This method assumes the image_usbkey_direction is already set
329 to servo side.
330
331 @param: usbkey_dev usbkey dev path(e.g. /dev/sdb).
332
333 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
334 or empty string if no test image detected, or unexpected
335 error occurred.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700336 """
Garry Wang70e5d062020-04-03 18:01:05 -0700337 logging.info('Checking ChromeOS image name on usbkey.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700338 usb_mount_path = '/media/servo_usb/%s' % self.servo_port
339 unmount_cmd = 'umount %s' % usb_mount_path
340 # ChromeOS root fs is in /dev/sdx3
341 mount_cmd = 'mount -o ro %s3 %s' % (usbkey_dev, usb_mount_path)
342 # Unmount if there is an existing stale mount.
343 self.run(unmount_cmd, ignore_status=True)
344 # Create if the mount point is not existing.
345 self.run('mkdir -p %s' % usb_mount_path)
346 try:
347 # Attempt to mount the usb drive.
348 mount_result = self.run(mount_cmd, ignore_status=True)
Garry Wang70e5d062020-04-03 18:01:05 -0700349 if mount_result.exit_status != 0:
350 logging.error('Unexpected error occurred on mount usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700351 return ''
Garry Wang70e5d062020-04-03 18:01:05 -0700352
353 release_content = self.run(
354 'cat %s/etc/lsb-release' % usb_mount_path,
355 ignore_status=True).stdout.strip()
356
357 if not re.search(r'RELEASE_TRACK=.*test', release_content):
358 logging.info('The image on usbkey is not a test image')
359 return ''
360
361 return lsbrelease_utils.get_chromeos_release_builder_path(
362 lsb_release_content=release_content)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700363 finally:
Garry Wang70e5d062020-04-03 18:01:05 -0700364 logging.debug('Image check compeleted, unmounting the usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700365 self.run(unmount_cmd, ignore_status=True)
366
367
Garry Wang70e5d062020-04-03 18:01:05 -0700368 def _probe_and_validate_usb_dev(self):
369 """This method probe the usb dev path by talking to servo, and then
370 validate the dev path is valid block device to servohost.
371 Possible output:
372 1. Encounter error during probe usb dev, returns empty string.
373 2. probe usb dev completed without error but cannot find usb dev,
374 raise AutoservRepairError.
375 3. probe usb dev find a usb dev path, but failed validation in this
376 method, raise AutoservRepairError.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700377
Garry Wang70e5d062020-04-03 18:01:05 -0700378 @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
379 if unexpected error occurred during probe.
380 @raises: AutoservRepairError if servo couldn't probe the usb dev path
381 (servo.probe_host_usb_dev() returns empty string), or the dev path is
382 not valid block device to servohost.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700383 """
384 logging.info('Validating image usbkey on servo.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700385 try:
Garry Wang70e5d062020-04-03 18:01:05 -0700386 usb_dev = self._servo.probe_host_usb_dev()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700387 except Exception as e:
388 # We don't want any unexpected or transient servo communicating
389 # failure block usb repair, so capture all errors here.
390 logging.error(e, exc_info=True)
391 logging.error('Unexpected error occurred on get usbkey dev path,'
392 ' skipping usbkey validation.')
393 return ''
394
Garry Wang70e5d062020-04-03 18:01:05 -0700395 if usb_dev:
396 # probe_host_usb_dev() sometimes return stale record,
397 # so we need to make sure the path exists in fdisk.
398 validate_cmd = 'fdisk -l | grep %s' % usb_dev
Garry Wang11441182020-06-16 18:34:14 -0700399 try:
400 resp = self.run(validate_cmd, ignore_status=True, timeout=60)
401 if resp.exit_status == 0:
402 return usb_dev
Garry Wang70e5d062020-04-03 18:01:05 -0700403
Garry Wang11441182020-06-16 18:34:14 -0700404 logging.error('%s is reported from "image_usbkey_dev" control'
405 ' but not detected by fdisk!', usb_dev)
406 except error.AutoservRunError as e:
407 if 'Timeout encountered' in str(e):
408 logging.warning('Timeout encountered during fdisk run,'
409 ' skipping usbkey validation.')
410 return ''
411 raise
Garry Wang70e5d062020-04-03 18:01:05 -0700412
413 raise hosts.AutoservRepairError(
414 'No usbkey detected on servo, the usbkey may be either missing'
415 ' or broken. Please replace usbkey on the servo and retry.',
416 'missing usbkey')
417
418
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700419 def is_ec_supported(self):
Garry Wang9b8f2342020-04-17 16:34:09 -0700420 """Check if ec is supported on the servo_board"""
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700421 if self.servo_board:
422 try:
423 frm_config = config.Config(self.servo_board, self.servo_model)
424 return frm_config.chrome_ec
425 except Exception as e:
426 logging.error('Unexpected error when read from firmware'
427 ' configs; %s', str(e))
428 return False
429
Garry Wang70e5d062020-04-03 18:01:05 -0700430 def validate_image_usbkey(self):
431 """This method first validate if there is a recover usbkey on servo
432 that accessible to servohost, and second check if a ChromeOS image is
433 already on the usb drive and return the image_name so we can avoid
434 unnecessary download and flash to the recover usbkey on servo.
435
436 Please note that, there is special error handling logic here:
437 1. If unexpected error happens, we return empty string. So repair
438 actions will not get blocked.
439 2. If no working usbkey present on servo, but no errors, we'll raise
440 AutoservRepairError here.
441
442 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
443 or empty string if no test image detected, or unexpected
444 error occurred.
445 @raises: AutoservRepairError if the usbkey is not detected on servo.
446 """
447 usb_dev = self._probe_and_validate_usb_dev()
448 if usb_dev:
449 return self.get_image_name_from_usbkey(usb_dev)
450 else:
451 return ''
Garry Wang7b0e1b72020-03-25 19:08:59 -0700452
453
Richard Barnette1edbb162016-11-01 11:47:50 -0700454 def repair(self, silent=False):
455 """Attempt to repair servo host.
456
457 @param silent If true, suppress logging in `status.log`.
458 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700459 message = 'Beginning repair for servo host %s port %s serial %s'
460 message %= (self.hostname, self.servo_port, self.servo_serial)
461 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700462 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700463 self._repair_strategy.repair(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700464 self._servo_state = servo_constants.SERVO_STATE_WORKING
465 self.record('INFO', None, None,
466 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700467 # If target is a labstation then try to withdraw any existing
468 # reboot request created by this servo because it passed repair.
469 if self.is_labstation():
470 self.withdraw_reboot_request()
Garry Wang63b8c382020-03-11 22:28:40 -0700471 except Exception as e:
Otabek Kasimov120b6fa2020-07-03 00:15:27 -0700472 if not self.is_localhost():
Otabek Kasimovc6f30412020-06-30 20:08:12 -0700473 self._servo_state = self.determine_servo_state()
474 self.record('INFO', None, None,
475 'ServoHost repair set servo_state as %s'
476 % self._servo_state)
Garry Wang63b8c382020-03-11 22:28:40 -0700477 if self._is_critical_error(e):
478 self.disconnect_servo()
479 self.stop_servod()
480 raise
481
482
483 def _is_critical_error(self, error):
484 if (isinstance(error, hosts.AutoservVerifyDependencyError)
485 and not error.is_critical()):
486 logging.warning('Non-critical verify failure(s) detected during'
487 ' verify/repair servo, servo connection will'
488 ' still up but may not fully functional.'
489 ' Some repair actions and servo depended'
490 ' tests may not run.')
491 return False
492 logging.info('Critical verify failure(s) detected during repair/verify'
493 ' servo. Disconnecting servo and stop servod, all repair '
494 'action and tests that depends on servo will not run.')
495 return True
Fang Deng5d518f42013-08-02 14:04:32 -0700496
497
Dan Shi4d478522014-02-14 13:46:32 -0800498 def get_servo(self):
499 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700500
Dan Shi4d478522014-02-14 13:46:32 -0800501 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700502 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700503 """
Dan Shi4d478522014-02-14 13:46:32 -0800504 return self._servo
505
506
Garry Wang79e9af62019-06-12 15:19:19 -0700507 def request_reboot(self):
508 """Request servohost to be rebooted when it's safe to by touch a file.
509 """
510 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700511 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700512 self.run('touch %s' % self._reboot_file, ignore_status=True)
513
514
Garry Wang464ff1e2019-07-18 17:20:34 -0700515 def withdraw_reboot_request(self):
516 """Withdraw a servohost reboot request if exists by remove the flag
517 file.
518 """
519 logging.debug('Withdrawing request to reboot servohost %s that created'
520 ' by servo with port # %s if exists.',
521 self.hostname, self.servo_port)
522 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
523
524
Garry Wangc1288cf2019-12-17 14:58:00 -0800525 def start_servod(self, quick_startup=False):
526 """Start the servod process on servohost.
527 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800528 # Skip if running on the localhost.(crbug.com/1038168)
529 if self.is_localhost():
530 logging.debug("Servohost is a localhost, skipping start servod.")
531 return
532
533 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800534 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800535 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800536 if self.servo_model:
537 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800538 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800539 logging.warning('Board for DUT is unknown; starting servod'
540 ' assuming a pre-configured board.')
541
542 cmd += ' PORT=%d' % self.servo_port
543 if self.servo_serial:
544 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800545
546 # Start servod with dual_v4 if the DUT/servo from designated pools.
547 dut_host_info = self.get_dut_host_info()
548 if dut_host_info:
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700549 # DUAL_V4: servo setup includes servo_micro and ccd_cr50
550 # connection to the DUT
551 is_dual_setup = False
Garry Wang11b5e872020-03-11 15:14:08 -0700552 if bool(dut_host_info.pools &
553 servo_constants.POOLS_SUPPORT_DUAL_V4):
Garry Wangd7367482020-02-27 13:52:40 -0800554 logging.debug('The DUT is detected in following designated'
555 ' pools %s,starting servod with DUAL_V4 option.',
Garry Wang11b5e872020-03-11 15:14:08 -0700556 servo_constants.POOLS_SUPPORT_DUAL_V4)
Otabek Kasimovf10a7052020-08-14 03:09:23 -0700557 is_dual_setup = True
558 elif dut_host_info.attributes.get('servo_setup') == 'DUAL_V4':
559 logging.debug('The DUT servo setup specified in config as '
560 ' "DUAL_V4"')
561 is_dual_setup = True
562 if is_dual_setup:
Garry Wangd7367482020-02-27 13:52:40 -0800563 cmd += ' DUAL_V4=1'
564
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800565 # Remove the symbolic links from the logs. This helps ensure that
566 # a failed servod instantiation does not cause us to grab old logs
567 # by mistake.
568 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800569 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800570
571 # There's a lag between when `start servod` completes and when
572 # the _ServodConnectionVerifier trigger can actually succeed.
573 # The call to time.sleep() below gives time to make sure that
574 # the trigger won't fail after we return.
575
576 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
577 # But in the rare case all servo on a labstation are in heavy use they
578 # may take ~30 seconds. So the timeout value will double these value,
579 # and we'll try quick start up when first time initialize servohost,
580 # and use standard start up timeout in repair.
581 if quick_startup:
Garry Wang11b5e872020-03-11 15:14:08 -0700582 timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800583 else:
Garry Wang11b5e872020-03-11 15:14:08 -0700584 timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800585 logging.debug('Wait %s seconds for servod process fully up.', timeout)
586 time.sleep(timeout)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700587 # Cache the initial instance timestamp to check against servod restarts
588 self._initial_instance_ts = self.get_instance_logs_ts()
Garry Wangc1288cf2019-12-17 14:58:00 -0800589
590
591 def stop_servod(self):
592 """Stop the servod process on servohost.
593 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800594 # Skip if running on the localhost.(crbug.com/1038168)
595 if self.is_localhost():
596 logging.debug("Servohost is a localhost, skipping stop servod.")
597 return
598
Garry Wangc1288cf2019-12-17 14:58:00 -0800599 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800600 self.run('stop servod PORT=%d' % self.servo_port,
601 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800602 logging.debug('Wait %s seconds for servod process fully teardown.',
Garry Wang11b5e872020-03-11 15:14:08 -0700603 servo_constants.SERVOD_TEARDOWN_TIMEOUT)
604 time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
Garry Wangc1288cf2019-12-17 14:58:00 -0800605
606
607 def restart_servod(self, quick_startup=False):
608 """Restart the servod process on servohost.
609 """
610 self.stop_servod()
611 self.start_servod(quick_startup)
612
Garry Wang11b5e872020-03-11 15:14:08 -0700613
Garry Wangffbd2162020-04-17 16:13:48 -0700614 def _process_servodtool_error(self, response):
615 """Helper function to handle non-zero servodtool response.
616 """
617 if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
Garry Wangad245002020-05-15 15:20:23 -0700618 response.stdout):
Garry Wangffbd2162020-04-17 16:13:48 -0700619 logging.error('The servo is not plugged on a usb hub that supports'
620 ' power-cycle!')
Garry Wang000c6c02020-05-11 21:27:23 -0700621 # change the flag so we can update this label in later process.
622 self.smart_usbhub = False
Garry Wangffbd2162020-04-17 16:13:48 -0700623 return
624
625 if re.search(servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
626 self.servo_serial, response.stdout):
627 logging.error('No servo with serial %s found!', self.servo_serial)
628 return
629
630 logging.error('Unexpected error occurred from usbhub control, please'
631 ' file a bug and inform chrome-fleet-software@ team!')
632
633
634 def _get_servo_usb_devnum(self):
635 """Helper function to collect current usb devnum of servo.
636 """
Otabek Kasimov09192682020-06-01 18:17:44 -0700637 # TODO remove try-except when fix crbug.com/1087964
638 try:
639 cmd = 'servodtool device -s %s usb-path' % self.servo_serial
640 resp = self.run(cmd, ignore_status=True, timeout=30)
641 except Exception as e:
642 # Here we catch only timeout errors.
643 # Other errors is filtered by ignore_status=True
644 logging.debug('Attempt to get servo usb-path failed due to '
645 'timeout; %s', e)
646 return ''
647
Garry Wangffbd2162020-04-17 16:13:48 -0700648 if resp.exit_status != 0:
649 self._process_servodtool_error(resp)
650 return ''
651 usb_path = resp.stdout.strip()
652 logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
653
654 resp = self.run('cat %s/devnum' % usb_path,
655 ignore_status=True)
656 if resp.exit_status != 0:
657 self._process_servodtool_error(resp)
658 return ''
659 return resp.stdout.strip()
660
Garry Wang358aad42020-08-02 20:56:04 -0700661 def reboot_servo_v3_on_need(self):
662 """Check and reboot servo_v3 based on below conditions.
663 1. If there is an update pending on reboot.
664 2. Servo_v3 has been up for more than 96 hours.
665 """
666 if self.get_board() != 'beaglebone_servo':
667 logging.info('Servo reboot is only applicable for servo V3.')
Otabek Kasimove6df8102020-07-21 20:15:25 -0700668 return
669
Garry Wang358aad42020-08-02 20:56:04 -0700670 update_pending_reboot = (self._check_update_status() ==
671 self.UPDATE_STATE.PENDING_REBOOT)
672 uptime_hours = float(self.check_uptime())/3600
673 logging.info('Uptime of servo_v3: %s hour(s)', uptime_hours)
674 long_up_time = uptime_hours > 96
675
676 # Skip reboot if neither condition are met.
677 if not (update_pending_reboot or long_up_time):
Otabek Kasimove6df8102020-07-21 20:15:25 -0700678 return
679
Garry Wang358aad42020-08-02 20:56:04 -0700680 if update_pending_reboot:
681 message = 'Starting reboot servo_v3 because an update is pending.'
682 reboot_method = self._post_update_reboot
683 elif long_up_time:
684 message = 'Starting reboot servo_v3 because uptime > 96 hours.'
685 reboot_method = self._servo_host_reboot
686 self.record('INFO', None, None, message)
687 logging.info(message)
Otabek Kasimove6df8102020-07-21 20:15:25 -0700688 try:
Garry Wang358aad42020-08-02 20:56:04 -0700689 reboot_method()
Otabek Kasimove6df8102020-07-21 20:15:25 -0700690 message = 'Servo_v3 reboot completed successfully.'
691 except Exception as e:
692 logging.debug("Fail to reboot servo_v3; %s", e)
693 message = ('Servo_v3 reboot failed, please check debug log '
694 'for details.')
695 logging.info(message)
696 self.record('INFO', None, None, message)
Garry Wangffbd2162020-04-17 16:13:48 -0700697
698 def _reset_servo(self):
699 logging.info('Resetting servo through smart usbhub.')
Otabek Kasimov09192682020-06-01 18:17:44 -0700700 # TODO remove try-except when fix crbug.com/1087964
701 try:
702 resp = self.run('servodtool device -s %s power-cycle' %
703 self.servo_serial, ignore_status=True,
704 timeout=30)
705 if resp.exit_status != 0:
706 self._process_servodtool_error(resp)
707 return False
708 except Exception as e:
709 # Here we catch only timeout errors.
710 # Other errors is filtered by ignore_status=True
711 logging.debug('Attempt to reset servo failed due to timeout;'
712 ' %s', e)
Garry Wangffbd2162020-04-17 16:13:48 -0700713 return False
714
715 logging.debug('Wait %s seconds for servo to come back from reset.',
716 servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
717 time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
Garry Wang000c6c02020-05-11 21:27:23 -0700718 # change the flag so we can update this label in later process.
719 self.smart_usbhub = True
Garry Wangffbd2162020-04-17 16:13:48 -0700720 return True
721
722
723 def reset_servo(self):
724 """Reset(power-cycle) the servo via smart usbhub.
725 """
726 if not self.is_labstation():
727 logging.info('Servo reset is not applicable to servo_v3.')
728 return
729
730 pre_reset_devnum = self._get_servo_usb_devnum()
731 logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
732 result = self._reset_servo()
733 if not result:
Garry Wangfd5c8b62020-06-08 15:36:54 -0700734 message = ('Failed to reset servo with serial: %s. (Please ignore'
735 ' this error if the DUT is not connected to a smart'
736 ' usbhub).' % self.servo_serial)
Garry Wangffbd2162020-04-17 16:13:48 -0700737 logging.warning(message)
738 self.record('INFO', None, None, message)
739 return
740
741 post_reset_devnum = self._get_servo_usb_devnum()
742 logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
743 if not (pre_reset_devnum and post_reset_devnum):
744 message = ('Servo reset completed but unable to verify'
745 ' devnum change!')
746 elif pre_reset_devnum != post_reset_devnum:
747 message = ('Reset servo with serial %s completed successfully!'
748 % self.servo_serial)
749 else:
750 message = 'Servo reset completed but devnum is still not changed!'
751 logging.info(message)
752 self.record('INFO', None, None, message)
753
754
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800755 def _extract_compressed_logs(self, logdir, relevant_files):
756 """Decompress servod logs in |logdir|.
757
758 @param logdir: directory containing compressed servod logs.
759 @param relevant_files: list of files in |logdir| to consider.
760
761 @returns: tuple, (tarfiles, files) where
762 tarfiles: list of the compressed filenames that have been
763 extracted and deleted
764 files: list of the uncompressed files that were generated
765 """
766 # For all tar-files, first extract them to the directory, and
767 # then let the common flow handle them.
768 tarfiles = [cf for cf in relevant_files if
769 cf.endswith(self.COMPRESSION_SUFFIX)]
770 files = []
771 for f in tarfiles:
772 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
773 with tarfile.open(f) as tf:
774 # Each tarfile has only one member, as
775 # that's the compressed log.
776 member = tf.members[0]
777 # Manipulate so that it only extracts the basename, and not
778 # the directories etc.
779 member.name = norm_name
780 files.append(os.path.join(logdir, member.name))
781 tf.extract(member, logdir)
782 # File has been extracted: remove the compressed file.
783 os.remove(f)
784 return tarfiles, files
785
786 def _extract_mcu_logs(self, log_subdir):
787 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
788
789 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
790 lines from the logs to generate invidiual console logs e.g. after
791 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
792 those MCUs had any console input/output.
793
794 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
795 """
796 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
797 # files
798 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
799 if not os.path.exists(mcu_lines_file):
800 logging.info('No DEBUG logs found to extract MCU logs from.')
801 return
802 mcu_files = {}
803 mcu_file_template = '%s.txt'
804 with open(mcu_lines_file, 'r') as f:
805 for line in f:
806 match = self.MCU_EXTRACTOR.match(line)
807 if match:
808 mcu = match.group(self.MCU_GROUP).lower()
809 line = match.group(self.LINE_GROUP)
810 if mcu not in mcu_files:
811 mcu_file = os.path.join(log_subdir,
812 mcu_file_template % mcu)
813 mcu_files[mcu] = open(mcu_file, 'a')
814 fd = mcu_files[mcu]
815 fd.write(line + '\n')
816 for f in mcu_files:
817 mcu_files[f].close()
818
819
820 def remove_latest_log_symlinks(self):
821 """Remove the conveninence symlinks 'latest' servod logs."""
822 symlink_wildcard = '%s/latest*' % self.remote_log_dir
823 cmd = 'rm ' + symlink_wildcard
824 self.run(cmd, stderr_tee=None, ignore_status=True)
825
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700826 def probe_servod_restart(self, instance_ts, outdir):
827 """Grab servod logs from previous instances if part of this session.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800828
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700829 If since the last time this host called start_servod() servod crashed
830 and restarted, this helper finds those logs as well, and stores them
831 with the |OLD_LOG_SUFFIX| to investigate if necessary.
Prasad Vuppalapu5bd9da12020-03-31 01:46:47 +0000832
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700833 It also issues a panicinfo command to servo devices after the restart
834 to try and collect reboot information for debugging.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800835
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700836 @param instance_ts: the log timestamp that the current instance uses
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800837 @param outdir: directory to create a subdirectory into to place the
838 servod logs into.
839 """
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700840 if self._initial_instance_ts is None:
841 logging.info('No log timestamp grabbed successfully on servod '
842 'startup. Cannot check device restarts. Ignoring.')
843 return
844 if instance_ts == self._initial_instance_ts:
845 logging.debug('Servod appears to have run without restarting')
846 return
847 # Servod seems to have restarted (at least once). |_initial_instance_ts|
848 # is the first timestamp, and instance_ts is the current timestamp. Find
849 # all timestamps in between them, and grab the logs for each.
850 tss = self._find_instance_timestamps_between(self._initial_instance_ts,
851 instance_ts)
852 logging.info('Servod has restarted %d times between the start and the '
853 'end of this servo_host.', len(tss))
854 logging.info('This might be an issue. Will extract all logs from each '
855 'instance.')
856 logging.info('Logs that are not the currently running (about to turn '
857 'down) instance are maked with a .%s in their folder.',
858 self.OLD_LOG_SUFFIX)
859 for ts in tss:
860 self.get_instance_logs(ts, outdir, old=True)
861 # Lastly, servod has restarted due to a potential issue. Try to get
862 # panic information from servo micro and servo v4 for the current logs.
863 # This can only happen if the |_servo| attribute is initialized.
864 if self._servo:
865 for mcu in ['servo_micro', 'servo_v4']:
866 ctrl = '%s_uart_cmd' % mcu
867 if self._servo.has_control(ctrl):
868 logging.info('Trying to retrieve %r panicinfo into logs',
869 mcu)
870 try:
871 self._servo.set_nocheck(ctrl, 'panicinfo')
872 except error.TestFail as e:
873 logging.error('Failed to generate panicinfo for %r '
874 'logs. %s', mcu, str(e))
875
876 def _find_instance_timestamps_between(self, start_ts, end_ts):
877 """Find all log timestamps between [start_ts, end_ts).
878
879 @param start_ts: str, earliest log timestamp of interest
880 @param end_ts: str, latest log timestamp of interest
881
882 @returns: list, all timestamps between start_ts and end_ts, end_ts
883 exclusive, on the servo_host. An empty list on errors
884 """
885 # Simply get all timestamp, and then sort and remove
886 cmd = 'ls %s' % self.remote_log_dir
887 res = self.run(cmd, stderr_tee=None, ignore_status=True)
888 if res.exit_status != 0:
889 # Here we failed to find anything.
890 logging.info('Failed to find remote servod logs. Ignoring.')
891 return []
892 logfiles = res.stdout.strip().split()
893 timestamps = set()
894 for logfile in logfiles:
895 ts_match = self.TS_EXTRACTOR.match(logfile)
896 if not ts_match:
897 # Simply ignore files that fail the check. It might be the
898 # 'latest' symlinks or random files.
899 continue
900 timestamps.add(ts_match.group(self.TS_GROUP))
901 # At this point we have all unique timestamps.
902 timestamps = sorted(timestamps)
903 for ts in [start_ts, end_ts]:
904 if ts not in timestamps:
905 logging.error('Timestamp %r not in servod logs. Cannot query '
906 'for timestamps in between %r and %r', ts,
907 start_ts, end_ts)
908 return []
909 return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
910
911 def get_instance_logs_ts(self):
912 """Retrieve the currently running servod instance's log timestamp
913
914 @returns: str, timestamp for current instance, or None on failure
915 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800916 # First, extract the timestamp. This cmd gives the real filename of
917 # the latest aka current log file.
918 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
919 'then realpath %(dir)s/latest.DEBUG;'
920 'elif [ -f %(dir)s/latest ];'
921 'then realpath %(dir)s/latest;'
922 'else exit %(code)d;'
923 'fi' % {'dir': self.remote_log_dir,
924 'code': self.NO_SYMLINKS_CODE})
925 res = self.run(cmd, stderr_tee=None, ignore_status=True)
926 if res.exit_status != 0:
927 if res.exit_status == self.NO_SYMLINKS_CODE:
928 logging.warning('servod log latest symlinks not found. '
929 'This is likely due to an error starting up '
930 'servod. Ignoring..')
931 else:
932 logging.warning('Failed to find servod logs on servo host.')
933 logging.warning(res.stderr.strip())
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700934 return None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800935 fname = os.path.basename(res.stdout.strip())
936 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
Ruben Rodriguez Buchillone9aa2b02020-03-04 12:14:28 -0800937 ts_match = self.TS_EXTRACTOR.match(fname)
938 if not ts_match:
939 logging.warning('Failed to extract timestamp from servod log file '
940 '%r. Skipping. The servo host is using outdated '
941 'servod logging and needs to be updated.', fname)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700942 return None
943 return ts_match.group(self.TS_GROUP)
944
945 def get_instance_logs(self, instance_ts, outdir, old=False):
946 """Collect all logs with |instance_ts| and dump into a dir in |outdir|
947
948 This method first collects all logs on the servo_host side pertaining
949 to this servod instance (port, instatiation). It glues them together
950 into combined log.[level].txt files and extracts all available MCU
951 console I/O from the logs into individual files e.g. servo_v4.txt
952
953 All the output can be found in a directory inside |outdir| that
954 this generates based on |LOG_DIR|, the servod port, and the instance
955 timestamp on the servo_host side.
956
957 @param instance_ts: log timestamp to grab logfiles for
958 @param outdir: directory to create a subdirectory into to place the
959 servod logs into.
960 @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
961 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800962 # Create the local results log dir.
963 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
964 str(self.servo_port),
965 instance_ts))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700966 if old:
967 log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
968 logging.info('Saving servod logs to %r.', log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800969 os.mkdir(log_dir)
970 # Now, get all files with that timestamp.
971 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
972 instance_ts)
973 res = self.run(cmd, stderr_tee=None, ignore_status=True)
974 files = res.stdout.strip().split()
975 try:
976 self.get_file(files, log_dir, try_rsync=False)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700977 if not os.listdir(log_dir):
978 logging.info('No servod logs retrieved. Ignoring, and removing '
979 '%r again.', log_dir)
980 os.rmdir(log_dir)
981 return
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800982 except error.AutoservRunError as e:
983 result = e.result_obj
984 if result.exit_status != 0:
985 stderr = result.stderr.strip()
986 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
987 stderr or '\n%s' % result)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700988 # Remove the log_dir as nothing was added to it.
989 os.rmdir(log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800990 return
991 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
992 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
993 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
994 # Create the joint files for each loglevel. i.e log.DEBUG
995 joint_file = self.JOINT_LOG_PREFIX
996 if level_name:
997 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
998 # This helps with some online tools to avoid complaints about an
999 # unknown filetype.
1000 joint_file = joint_file + '.txt'
1001 joint_path = os.path.join(log_dir, joint_file)
1002 files = [f for f in local_files if level_name in f]
1003 if not files:
1004 # TODO(crrev.com/c/1793030): remove no-level case once CL
1005 # is pushed
1006 continue
1007 # Extract compressed logs if any.
1008 compressed, extracted = self._extract_compressed_logs(log_dir,
1009 files)
1010 files = list(set(files) - set(compressed))
1011 files.extend(extracted)
1012 # Need to sort. As they all share the same timestamp, and
1013 # loglevel, the index itself is sufficient. The highest index
1014 # is the oldest file, therefore we need a descending sort.
1015 def sortkey(f, level=level_name):
1016 """Custom sortkey to sort based on rotation number int."""
1017 if f.endswith(level_name): return 0
1018 return int(f.split('.')[-1])
1019
1020 files.sort(reverse=True, key=sortkey)
1021 # Just rename the first file rather than building from scratch.
1022 os.rename(files[0], joint_path)
1023 with open(joint_path, 'a') as joint_f:
1024 for logfile in files[1:]:
1025 # Transfer the file to the joint file line by line.
1026 with open(logfile, 'r') as log_f:
1027 for line in log_f:
1028 joint_f.write(line)
1029 # File has been written over. Delete safely.
1030 os.remove(logfile)
1031 # Need to remove all files form |local_files| so we don't
1032 # analyze them again.
1033 local_files = list(set(local_files) - set(files) - set(compressed))
1034 # Lastly, extract MCU logs from the joint logs.
1035 self._extract_mcu_logs(log_dir)
1036
Garry Wang79e9af62019-06-12 15:19:19 -07001037 def _lock(self):
1038 """lock servohost by touching a file.
1039 """
1040 logging.debug('Locking servohost %s by touching %s file',
1041 self.hostname, self._lock_file)
1042 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001043 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -07001044
1045
1046 def _unlock(self):
1047 """Unlock servohost by removing the lock file.
1048 """
1049 logging.debug('Unlocking servohost by removing %s file',
1050 self._lock_file)
1051 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -07001052 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -07001053
1054
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001055 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -07001056 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001057 # NOTE: throughout this method there are multiple attempts to catch
1058 # all errors. This is WAI as log grabbing should not fail tests.
1059 # However, the goal is to catch and handle/process all errors, thus
1060 # we print the traceback and ask for a bug.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001061 if self._closed:
1062 logging.debug('ServoHost is already closed.')
1063 return
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001064 instance_ts = self.get_instance_logs_ts()
1065 # TODO(crbug.com/1011516): once enabled, remove the check against
1066 # localhost and instead check against log-rotiation enablement.
1067 logs_available = (instance_ts is not None and
1068 self.job and
1069 not self.is_localhost())
1070 if logs_available:
1071 # Probe whether there was a servod restart, and grab those old
1072 # logs as well.
1073 try:
1074 self.probe_servod_restart(instance_ts, self.job.resultdir)
1075 except (error.AutoservRunError, error.TestFail) as e:
1076 logging.info('Failed to grab servo logs due to: %s. '
1077 'This error is forgiven.', str(e))
1078 except Exception as e:
1079 logging.error('Unexpected error probing for old logs. %s. '
1080 'Forgiven. Please file a bug and fix or catch '
1081 'in log probing function', str(e),
1082 exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001083 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001084 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -07001085 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001086 self._servo.close(outdir)
1087
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001088 if logs_available:
1089 # Grab current (not old like above) logs after the servo instance
1090 # was closed out.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001091 try:
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001092 self.get_instance_logs(instance_ts, self.job.resultdir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001093 except error.AutoservRunError as e:
1094 logging.info('Failed to grab servo logs due to: %s. '
1095 'This error is forgiven.', str(e))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -07001096 except Exception as e:
1097 logging.error('Unexpected error grabbing servod logs. %s. '
1098 'Forgiven. Please file a bug and fix or catch '
1099 'in log grabbing function', str(e), exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001100
Garry Wang7c00b0f2019-06-25 17:28:17 -07001101 if self._is_locked:
1102 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -07001103 try:
1104 self._unlock()
1105 except error.AutoservSSHTimeout:
1106 logging.error('Unlock servohost failed due to ssh timeout.'
1107 ' It may caused by servohost went down during'
1108 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -08001109 # We want always stop servod after task to minimum the impact of bad
1110 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -08001111 try:
1112 self.stop_servod()
1113 except error.AutoservRunError as e:
1114 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001115 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -08001116
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001117 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001118 # Mark closed.
1119 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001120
1121
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001122 def get_servo_state(self):
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001123 return self._servo_state
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001124
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001125 def _get_host_metrics_data(self):
1126 return {'port': self.servo_port,
Otabek Kasimov0ea47362020-07-11 20:55:09 -07001127 'host': self.get_dut_hostname() or self.hostname,
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001128 'board': self.servo_board or ''}
1129
1130 def _is_servo_device_connected(self, servo_type, serial):
1131 """Check if device is connected to the labstation.
1132
1133 Works for all servo devices connected to the labstation.
1134 For servo_v3 please use 'self._is_servo_board_present_on_servo_v3'
1135
1136 @param servo_type: The type of servo device. Expecting value can be
1137 servo_v4 or servo_micro.
1138 @param serial: The serial number of the device to detect it.
1139 """
1140 vid_pid = self.SERVO_VID_PID.get(servo_type)
1141 if not vid_pid or not serial:
1142 # device cannot detected without VID/PID or serial number
1143 return False
1144 logging.debug('Started to detect %s', servo_type)
1145 try:
1146 cmd = 'lsusb -v -d %s |grep iSerial |grep %s' % (vid_pid, serial)
1147 result = self.run(cmd, ignore_status=True, timeout=30)
1148 if result.exit_status == 0 and result.stdout.strip():
1149 logging.debug('The %s is plugged in to the host.', servo_type)
1150 return True
1151 logging.debug('%s device is not detected; %s', servo_type, result)
1152 return False
1153 except Exception as e:
1154 # can be triggered by timeout issue due running the script
1155 metrics.Counter(
1156 'chromeos/autotest/repair/servo_detection/timeout'
1157 ).increment(fields=self._get_host_metrics_data())
1158 logging.error('%s device is not detected; %s', servo_type, str(e))
1159 return None
1160
1161 def _is_servo_board_present_on_servo_v3(self):
1162 """Check if servo board is detected on servo_v3"""
1163 vid_pids = self.SERVO_VID_PID['servo_v3']
1164 if not vid_pids or len(vid_pids) == 0:
1165 # device cannot detected without VID/PID
1166 return False
1167 logging.debug('Started to detect servo board on servo_v3')
1168 not_detected = 'The servo board is not detected on servo_v3'
1169 try:
1170 cmd = 'lsusb | grep "%s"' % "\|".join(vid_pids)
1171 result = self.run(cmd, ignore_status=True, timeout=30)
1172 if result.exit_status == 0 and result.stdout.strip():
1173 logging.debug('The servo board is detected on servo_v3')
1174 return True
1175 logging.debug('%s; %s', not_detected, result)
1176 return False
1177 except Exception as e:
1178 # can be triggered by timeout issue due running the script
1179 metrics.Counter(
1180 'chromeos/autotest/repair/servo_detection/timeout'
1181 ).increment(fields=self._get_host_metrics_data())
1182 logging.error('%s; %s', not_detected, str(e))
1183 return None
1184
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001185 def _is_main_device_not_detected_on_servo_v4(self):
1186 """Check if servod cannot find main device on servo.
1187
1188 The check based on reading servod logs for servo_v4.
1189 """
1190 if not self._initial_instance_ts:
1191 # latest log not found
1192 return False
1193 logging.debug('latest log for servod created at %s',
1194 self._initial_instance_ts)
1195 try:
1196 log_created = calendar.timegm(time.strptime(
1197 self._initial_instance_ts,
1198 "%Y-%m-%d--%H-%M-%S.%f"))
1199 except ValueError as e:
1200 logging.debug('Cannot read time from log file name: %s',
1201 self._initial_instance_ts)
1202 return False
1203 min_time_created = calendar.timegm(time.gmtime())
1204 if min_time_created > log_created + 3600:
1205 # the log file is old we cannot use it
1206 logging.debug('log file was created more than hour ago, too old')
1207 return False
1208 logging.debug('latest log was created not longer then 1 hour ago')
1209
1210 # check if servod can detect main device by servo_v4
1211 message = 'ERROR - No servo micro or CCD detected for board'
1212 cmd = ('cat /var/log/servod_%s/log.%s.INFO |grep "%s"'
1213 % (self.servo_port, self._initial_instance_ts, message))
1214 result = self.run(cmd, ignore_status=True)
1215 if result.stdout.strip():
1216 logging.info('Servod cannot detect main device on the servo; '
1217 'Can be caused by bad hardware of servo or '
1218 'issue on the DUT side.')
1219 return True
1220 logging.debug('The main device is detected')
1221 return False
1222
Otabek Kasimov15963492020-06-23 21:10:51 -07001223 def get_verify_state(self, tag):
1224 """Return the state of servo verifier.
1225
1226 @returns: bool or None
1227 """
1228 return self._repair_strategy.verifier_is_good(tag)
1229
1230 def determine_servo_state(self):
1231 """Determine servo state based on the failed verifier.
1232
1233 @returns: servo state value
1234 The state detecting based on first fail verifier or collecting of
1235 them.
1236 """
1237 ssh = self.get_verify_state('servo_ssh')
1238 disk_space = self.get_verify_state('disk_space')
1239 start_servod = self.get_verify_state('servod_job')
1240 create_servo = self.get_verify_state('servod_connection')
1241 init_servo = self.get_verify_state('servod_control')
1242 pwr_button = self.get_verify_state('pwr_button')
1243 lid_open = self.get_verify_state('lid_open')
1244 ec_board = self.get_verify_state('ec_board')
1245 ccd_testlab = self.get_verify_state('ccd_testlab')
1246
1247 if not ssh:
1248 return servo_constants.SERVO_STATE_NO_SSH
1249
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001250 if (start_servod == self.VERIFY_FAILED
1251 or create_servo == self.VERIFY_FAILED):
1252 # sometimes servo can start with out present servo
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001253 if self.is_labstation():
1254 if not self.servo_serial:
1255 return servo_constants.SERVO_STATE_WRONG_CONFIG
1256 if self._is_servo_device_connected(
1257 'servo_v4',
1258 self.servo_serial) == False:
1259 return servo_constants.SERVO_STATE_NOT_CONNECTED
1260 elif self._is_servo_board_present_on_servo_v3() == False:
1261 return servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimov066bdb82020-08-12 15:57:44 -07001262
1263 if start_servod == self.VERIFY_FAILED:
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001264 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1265
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001266 if create_servo == self.VERIFY_FAILED:
1267 if (self.is_labstation()
1268 and self._is_main_device_not_detected_on_servo_v4()):
1269 servo_type = None
1270 if self.get_dut_host_info():
1271 servo_type = self.get_dut_host_info().get_label_value(
1272 servo_constants.SERVO_TYPE_LABEL_PREFIX)
1273 if servo_type and 'servo_micro' in servo_type:
1274 serial = self.get_servo_micro_serial_number()
1275 logging.debug('servo_micro serial: %s', serial)
1276 if self._is_servo_device_detected('servo_micro',
1277 serial):
1278 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1279 # Device can be not detected because of DUT
1280 # TODO (otabek) update after b/159755652 and b/159754985
1281 metrics.Counter(
1282 'chromeos/autotest/repair/servo_state/needs_replacement'
1283 ).increment(fields=self._get_host_metrics_data())
1284 elif not self.is_labstation():
1285 # Here need logic to check if flex cable is connected
1286 pass
1287
Otabek Kasimov15963492020-06-23 21:10:51 -07001288 # one of the reason why servo can not initialized
1289 if ccd_testlab == self.VERIFY_FAILED:
1290 return servo_constants.SERVO_STATE_CCD_TESTLAB_ISSUE
1291
Otabek Kasimov120b6fa2020-07-03 00:15:27 -07001292 if (create_servo == self.VERIFY_FAILED
1293 or init_servo == self.VERIFY_FAILED):
Otabek Kasimov15963492020-06-23 21:10:51 -07001294 return servo_constants.SERVO_STATE_SERVOD_ISSUE
1295
1296 if pwr_button == self.VERIFY_FAILED:
1297 return servo_constants.SERVO_STATE_BAD_RIBBON_CABLE
1298 if lid_open == self.VERIFY_FAILED:
1299 return servo_constants.SERVO_STATE_LID_OPEN_FAILED
1300 if ec_board == self.VERIFY_FAILED:
1301 return servo_constants.SERVO_STATE_EC_BROKEN
1302
Otabek Kasimov15963492020-06-23 21:10:51 -07001303 metrics.Counter(
1304 'chromeos/autotest/repair/unknown_servo_state'
Otabek Kasimovc6f30412020-06-30 20:08:12 -07001305 ).increment(fields=self._get_host_metrics_data())
Otabek Kasimov15963492020-06-23 21:10:51 -07001306 logging.info('We do not have special state for this failure yet :)')
1307 return servo_constants.SERVO_STATE_BROKEN
1308
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001309
Richard Barnetteea3e4602016-06-10 12:36:41 -07001310def make_servo_hostname(dut_hostname):
1311 """Given a DUT's hostname, return the hostname of its servo.
1312
1313 @param dut_hostname: hostname of a DUT.
1314
1315 @return hostname of the DUT's servo.
1316
1317 """
1318 host_parts = dut_hostname.split('.')
1319 host_parts[0] = host_parts[0] + '-servo'
1320 return '.'.join(host_parts)
1321
1322
1323def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001324 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001325
1326 @param servo_hostname: hostname of the servo host.
1327
1328 @return True if it's up, False otherwise
1329 """
1330 # Technically, this duplicates the SSH ping done early in the servo
1331 # proxy initialization code. However, this ping ends in a couple
1332 # seconds when if fails, rather than the 60 seconds it takes to decide
1333 # that an SSH ping has timed out. Specifically, that timeout happens
1334 # when our servo DNS name resolves, but there is no host at that IP.
1335 logging.info('Pinging servo host at %s', servo_hostname)
1336 ping_config = ping_runner.PingConfig(
1337 servo_hostname, count=3,
1338 ignore_result=True, ignore_status=True)
1339 return ping_runner.PingRunner().ping(ping_config).received > 0
1340
1341
Richard Barnettee519dcd2016-08-15 17:37:17 -07001342def _map_afe_board_to_servo_board(afe_board):
1343 """Map a board we get from the AFE to a servo appropriate value.
1344
1345 Many boards are identical to other boards for servo's purposes.
1346 This function makes that mapping.
1347
1348 @param afe_board string board name received from AFE.
1349 @return board we expect servo to have.
1350
1351 """
1352 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1353 BOARD_MAP = {'gizmo': 'panther'}
1354 mapped_board = afe_board
1355 if afe_board in BOARD_MAP:
1356 mapped_board = BOARD_MAP[afe_board]
1357 else:
1358 for suffix in KNOWN_SUFFIXES:
1359 if afe_board.endswith(suffix):
1360 mapped_board = afe_board[0:-len(suffix)]
1361 break
1362 if mapped_board != afe_board:
1363 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1364 return mapped_board
1365
1366
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001367def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001368 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001369
Richard Barnetteea3e4602016-06-10 12:36:41 -07001370 @param dut_host Instance of `Host` on which to find the servo
1371 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -07001372 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001373 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001374 info = dut_host.host_info_store.get()
1375 servo_args = {k: v for k, v in info.attributes.iteritems()
Garry Wang11b5e872020-03-11 15:14:08 -07001376 if k in servo_constants.SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -07001377
Garry Wang11b5e872020-03-11 15:14:08 -07001378 if servo_constants.SERVO_PORT_ATTR in servo_args:
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001379 try:
Garry Wang11b5e872020-03-11 15:14:08 -07001380 servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1381 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001382 except ValueError:
1383 logging.error('servo port is not an int: %s',
Garry Wang11b5e872020-03-11 15:14:08 -07001384 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001385 # Reset servo_args because we don't want to use an invalid port.
Garry Wang11b5e872020-03-11 15:14:08 -07001386 servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001387
1388 if info.board:
Garry Wang11b5e872020-03-11 15:14:08 -07001389 servo_board = _map_afe_board_to_servo_board(info.board)
1390 servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -07001391 if info.model:
Garry Wang11b5e872020-03-11 15:14:08 -07001392 servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1393 return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -07001394
1395
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001396def _tweak_args_for_ssp_moblab(servo_args):
Garry Wang11b5e872020-03-11 15:14:08 -07001397 if (servo_args[servo_constants.SERVO_HOST_ATTR]
1398 in ['localhost', '127.0.0.1']):
1399 servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001400 'SSP', 'host_container_ip', type=str, default=None)
1401
1402
Dan Shi023aae32016-05-25 11:13:01 -07001403def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001404 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001405 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -08001406
Richard Barnette9a26ad62016-06-10 12:03:08 -07001407 This function attempts to create and verify or repair a `ServoHost`
1408 object for a servo connected to the given `dut`, subject to various
1409 constraints imposed by the parameters:
1410 * When the `servo_args` parameter is not `None`, a servo
1411 host must be created, and must be checked with `repair()`.
1412 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1413 true:
1414 * If `try_servo_repair` is true, then create a servo host and
1415 check it with `repair()`.
1416 * Otherwise, if the servo responds to `ping` then create a
1417 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -08001418
Richard Barnette9a26ad62016-06-10 12:03:08 -07001419 In cases where `servo_args` was not `None`, repair failure
1420 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001421 are logged and then discarded. Note that this only happens in cases
1422 where we're called from a test (not special task) control file that
1423 has an explicit dependency on servo. In that case, we require that
1424 repair not write to `status.log`, so as to avoid polluting test
1425 results.
1426
1427 TODO(jrbarnette): The special handling for servo in test control
1428 files is a thorn in my flesh; I dearly hope to see it cut out before
1429 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001430
1431 Parameters for a servo host consist of a host name, port number, and
1432 DUT board, and are determined from one of these sources, in order of
1433 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -07001434 * Servo attributes from the `dut` parameter take precedence over
1435 all other sources of information.
1436 * If a DNS entry for the servo based on the DUT hostname exists in
1437 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -07001438 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001439 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -07001440 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001441
1442 @param dut An instance of `Host` from which to take
1443 servo parameters (if available).
1444 @param servo_args A dictionary with servo parameters to use if
1445 they can't be found from `dut`. If this
1446 argument is supplied, unrepaired exceptions
1447 from `verify()` will be passed back to the
1448 caller.
1449 @param try_lab_servo If not true, servo host creation will be
1450 skipped unless otherwise required by the
1451 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001452 @param try_servo_repair If true, check a servo host with
1453 `repair()` instead of `verify()`.
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001454 @param dut_host_info: A HostInfo object of the DUT that connected
1455 to this servo.
Dan Shi4d478522014-02-14 13:46:32 -08001456
1457 @returns: A ServoHost object or None. See comments above.
1458
1459 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001460 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001461 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001462 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -07001463 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001464 if utils.in_moblab_ssp():
1465 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001466 logging.debug(
1467 'Overriding provided servo_args (%s) with arguments'
1468 ' determined from the host (%s)',
1469 servo_args,
1470 servo_args_override,
1471 )
Richard Barnetteea3e4602016-06-10 12:36:41 -07001472 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001473
Richard Barnetteea3e4602016-06-10 12:36:41 -07001474 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001475 logging.debug('No servo_args provided, and failed to find overrides.')
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001476 if try_lab_servo or servo_dependency:
Otabek Kasimov646812c2020-06-23 20:01:36 -07001477 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001478 else:
1479 # For regular test case which not required the servo
1480 return None, None
1481
Garry Wang11b5e872020-03-11 15:14:08 -07001482 servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1483 servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001484 if not _is_servo_host_information_exist(servo_hostname, servo_port):
1485 logging.debug(
1486 'Servo connection info missed hostname: %s , port: %s',
1487 servo_hostname, servo_port)
Otabek Kasimov646812c2020-06-23 20:01:36 -07001488 return None, servo_constants.SERVO_STATE_MISSING_CONFIG
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001489 if not is_servo_host_information_valid(servo_hostname, servo_port):
1490 logging.debug(
1491 'Servo connection info is incorrect hostname: %s , port: %s',
1492 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001493 return None, servo_constants.SERVO_STATE_WRONG_CONFIG
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001494 if (not servo_dependency and not try_servo_repair and
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001495 not servo_host_is_up(servo_hostname)):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001496 logging.debug('ServoHost is not up.')
Otabek Kasimov646812c2020-06-23 20:01:36 -07001497 return None, servo_constants.SERVO_STATE_NO_SSH
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001498
Garry Wangebc015b2019-06-06 17:45:06 -07001499 newhost = ServoHost(**servo_args)
Garry Wangffbd2162020-04-17 16:13:48 -07001500
Otabek Kasimove6df8102020-07-21 20:15:25 -07001501 # Reset or reboot servo device only during AdminRepair tasks.
1502 if try_servo_repair:
1503 if newhost._is_locked:
1504 # Reset servo if the servo is locked, as we check if the servohost
1505 # is up, if the servohost is labstation and if the servohost is in
1506 # lab inside the locking logic.
1507 newhost.reset_servo()
1508 else:
Garry Wang358aad42020-08-02 20:56:04 -07001509 try:
1510 newhost.reboot_servo_v3_on_need()
Garry Wang1f0d5332020-08-10 19:32:32 -07001511 except Exception as e:
1512 logging.info('[Non-critical] Unexpected error while trying to'
1513 ' reboot servo_v3, skipping the reboot; %s', e)
Otabek Kasimove6df8102020-07-21 20:15:25 -07001514
Otabek Kasimov2b50cdb2020-07-06 19:16:06 -07001515 if dut:
1516 newhost.set_dut_hostname(dut.hostname)
Otabek Kasimov9e90ae12020-08-14 03:01:19 -07001517 if dut_host_info:
1518 newhost.set_dut_host_info(dut_host_info)
Garry Wangffbd2162020-04-17 16:13:48 -07001519
Otabek Kasimov8475cce2020-07-14 12:11:31 -07001520 if try_lab_servo or try_servo_repair:
1521 try:
1522 logging.info("Check and update servo firmware.")
1523 servo_updater.update_servo_firmware(
1524 newhost,
1525 force_update=False)
1526 except Exception as e:
1527 logging.error("Servo device update error: %s", e)
1528
Garry Wangcdd27b22020-01-13 14:59:11 -08001529 try:
1530 newhost.restart_servod(quick_startup=True)
1531 except error.AutoservSSHTimeout:
1532 logging.warning("Restart servod failed due ssh connection "
1533 "to servohost timed out. This error is forgiven"
1534 " here, we will retry in servo repair process.")
1535 except error.AutoservRunError as e:
1536 logging.warning("Restart servod failed due to:\n%s\n"
1537 "This error is forgiven here, we will retry"
1538 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -07001539
Richard Barnette9a26ad62016-06-10 12:03:08 -07001540 # Note that the logic of repair() includes everything done
1541 # by verify(). It's sufficient to call one or the other;
1542 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001543 if servo_dependency:
1544 newhost.repair(silent=True)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001545 return newhost, newhost.get_servo_state()
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001546
1547 if try_servo_repair:
1548 try:
1549 newhost.repair()
1550 except Exception:
1551 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -07001552 else:
1553 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001554 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001555 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001556 logging.exception('servo verify failed for %s', newhost.hostname)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001557 return newhost, newhost.get_servo_state()
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001558
1559
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001560def _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001561 if hostname is None or len(hostname.strip()) == 0:
1562 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001563 if port is None:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001564 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001565 if not type(port) is int:
1566 try:
1567 int(port)
1568 except ValueError:
1569 return False
1570
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001571 return True
1572
1573
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001574def is_servo_host_information_valid(hostname, port):
Garry Wang9b8f2342020-04-17 16:34:09 -07001575 """Check if provided servo attributes are valid.
1576
1577 @param hostname Hostname of the servohost.
1578 @param port servo port number.
1579
1580 @returns: A bool value to indicate if provided servo attribute valid.
1581 """
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001582 if not _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001583 return False
1584 # checking range and correct of the port
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001585 port_int = int(port)
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001586 if port_int < 1 or port_int > 65000:
1587 return False
1588 # we expecting host contain only latters, digits and '-' or '_'
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001589 if not re.match('[a-zA-Z0-9-_\.]*$', hostname) or len(hostname) < 5:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001590 return False
1591 return True