blob: c4385f0ae58dd9045a93268ca851b64657e680f0 [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
Fang Deng5d518f42013-08-02 14:04:32 -070012import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060013import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080014import re
15import tarfile
Garry Wangc1288cf2019-12-17 14:58:00 -080016import time
Gregory Nisbet265a52c2019-12-10 20:38:42 -080017import traceback
Dana Goyette4dc0adc2019-05-06 14:51:53 -070018import xmlrpclib
Fang Deng5d518f42013-08-02 14:04:32 -070019
20from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070021from autotest_lib.client.common_lib import error
Richard Barnette9a26ad62016-06-10 12:03:08 -070022from autotest_lib.client.common_lib import hosts
Garry Wang7b0e1b72020-03-25 19:08:59 -070023from autotest_lib.client.common_lib import lsbrelease_utils
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070025from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070026from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000027from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070028from autotest_lib.server.hosts import base_servohost
Garry Wang11b5e872020-03-11 15:14:08 -070029from autotest_lib.server.hosts import servo_constants
Otabek Kasimov4ea636e2020-04-14 23:35:06 -070030from autotest_lib.server.cros.faft.utils import config
Garry Wang11b5e872020-03-11 15:14:08 -070031from autotest_lib.client.common_lib import global_config
Garry Wangd7367482020-02-27 13:52:40 -080032
Dan Shi3b2adf62015-09-02 17:46:54 -070033_CONFIG = global_config.global_config
Fang Deng5d518f42013-08-02 14:04:32 -070034
Otabek Kasimova7ba91a2020-03-09 08:31:01 -070035
Garry Wangebc015b2019-06-06 17:45:06 -070036class ServoHost(base_servohost.BaseServoHost):
37 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070038 that with a servo instance for a specific port.
39
40 @type _servo: servo.Servo | None
41 """
Fang Deng5d518f42013-08-02 14:04:32 -070042
Raul E Rangel52ca2e82018-07-03 14:10:14 -060043 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070044
Dan Shie5b3c512014-08-21 12:12:09 -070045 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070046 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070047
xixuan6cf6d2f2016-01-29 15:29:00 -080048 # Ready test function
49 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070050
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080051 # Directory prefix on the servo host where the servod logs are stored.
52 SERVOD_LOG_PREFIX = '/var/log/servod'
53
54 # Exit code to use when symlinks for servod logs are not found.
55 NO_SYMLINKS_CODE = 9
56
57 # Directory in the job's results directory to dump the logs into.
58 LOG_DIR = 'servod'
59
60 # Prefix for joint loglevel files in the logs.
61 JOINT_LOG_PREFIX = 'log'
62
63 # Regex group to extract timestamp from logfile name.
64 TS_GROUP = 'ts'
65
66 # This regex is used to extract the timestamp from servod logs.
67 # files always start with log.
68 TS_RE = (r'log.'
69 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
70 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
71 # The loglevel is optional depending on labstation version.
72 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
73 TS_EXTRACTOR = re.compile(TS_RE)
74
75 # Regex group to extract MCU name from logline in servod logs.
76 MCU_GROUP = 'mcu'
77
78 # Regex group to extract logline from MCU logline in servod logs.
79 LINE_GROUP = 'line'
80
81 # This regex is used to extract the mcu and the line content from an
82 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
83 # Here is an example log-line:
84 #
85 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
86 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
87 #
88 # Here is conceptually how they are formatted:
89 #
90 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
91 # <output>
92 #
93 # The log format starts with a timestamp
94 MCU_RE = (r'[\d\-]+ [\d:,]+ '
95 # The mcu that is logging this is next.
96 r'- (?P<%s>\w+) - '
97 # Next, we have more log outputs before the actual line.
98 # Information about the file line, logging function etc.
99 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
100 # NOTE: if the log format changes, this regex needs to be
101 # adjusted.
102 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
103 # Lastly, we get the MCU's console line.
104 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
105 MCU_EXTRACTOR = re.compile(MCU_RE)
106
107 # Suffix to identify compressed logfiles.
108 COMPRESSION_SUFFIX = '.tbz2'
109
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700110 # A suffix to mark servod log directories that came from instance that
111 # ran during this servo_host, but are not the last one running e.g. when
112 # an instance (on purpose, or due to a bug) restarted in the middle of the
113 # run.
114 OLD_LOG_SUFFIX = 'old'
115
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800116 def _init_attributes(self):
117 self._servo_state = None
118 self.servo_port = None
119 self.servo_board = None
120 self.servo_model = None
121 self.servo_serial = None
Garry Wang000c6c02020-05-11 21:27:23 -0700122 # The flag that indicate if a servo is connected to a smart usbhub.
123 # TODO(xianuowang@) remove this flag once all usbhubs in the lab
124 # get replaced.
125 self.smart_usbhub = None
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800126 self._servo = None
127 self._servod_server_proxy = None
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700128 self._initial_instance_ts = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800129 # Flag to make sure that multiple calls to close do not result in the
130 # logic executing multiple times.
131 self._closed = False
Fang Deng5d518f42013-08-02 14:04:32 -0700132
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700133 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700134 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700135 servo_model=None, servo_serial=None, is_in_lab=None,
136 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700137 """Initialize a ServoHost instance.
138
139 A ServoHost instance represents a host that controls a servo.
140
141 @param servo_host: Name of the host where the servod process
142 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600143 @param servo_port: Port the servod process is listening on. Defaults
144 to the SERVOD_PORT environment variable if set,
145 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700146 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700147 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800148 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
149 to None, for which utils.host_is_in_lab_zone will be
150 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700151
152 """
153 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700154 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800155 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700156 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700157 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700158 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700159 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800160
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800161 # The location of the log files on the servo host for this instance.
162 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
163 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700164 # Path of the servo host lock file.
165 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
166 + self.LOCK_FILE_POSTFIX)
167 # File path to declare a reboot request.
168 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
169 + self.REBOOT_FILE_POSTFIX)
170
171 # Lock the servo host if it's an in-lab labstation to prevent other
172 # task to reboot it until current task completes. We also wait and
173 # make sure the labstation is up here, in the case of the labstation is
174 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700175 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700176 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
177 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700178 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700179
Richard Barnette9a26ad62016-06-10 12:03:08 -0700180 self._repair_strategy = (
181 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700182
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700183 def __str__(self):
184 return "<%s '%s:%s'>" % (
185 type(self).__name__, self.hostname, self.servo_port)
186
Richard Barnette9a26ad62016-06-10 12:03:08 -0700187 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700188 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700189
190 Initializes `self._servo` and then verifies that all network
191 connections are working. This will create an ssh tunnel if
192 it's required.
193
194 As a side effect of testing the connection, all signals on the
195 target servo are reset to default values, and the USB stick is
196 set to the neutral (off) position.
197 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700198 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Kuang-che Wu05763f52019-08-30 16:48:21 +0800199 self._servo = servo_obj
Richard Barnette9a26ad62016-06-10 12:03:08 -0700200 timeout, _ = retry.timeout(
201 servo_obj.initialize_dut,
202 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
203 if timeout:
204 raise hosts.AutoservVerifyError(
205 'Servo initialize timed out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700206
207
208 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700209 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700210
211 If we've previously successfully connected to our servo,
212 disconnect any established ssh tunnel, and set `self._servo`
213 back to `None`.
214 """
215 if self._servo:
216 # N.B. This call is safe even without a tunnel:
217 # rpc_server_tracker.disconnect() silently ignores
218 # unknown ports.
219 self.rpc_server_tracker.disconnect(self.servo_port)
220 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700221
Garry Wangc1288cf2019-12-17 14:58:00 -0800222
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800223 def _create_servod_server_proxy(self):
224 """Create a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700225
226 @returns: An xmlrpclib.ServerProxy that is connected to the servod
227 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700228 """
Garry Wang11b5e872020-03-11 15:14:08 -0700229 if (servo_constants.ENABLE_SSH_TUNNEL_FOR_SERVO
230 and not self.is_localhost()):
Richard Barnette9a26ad62016-06-10 12:03:08 -0700231 return self.rpc_server_tracker.xmlrpc_connect(
232 None, self.servo_port,
233 ready_test_name=self.SERVO_READY_METHOD,
Allen Li2b1a8992018-11-27 14:17:18 -0800234 timeout_seconds=60,
Dana Goyetteafa62fd2020-03-16 13:45:27 -0700235 request_timeout_seconds=3600,
236 server_desc=str(self))
Richard Barnette9a26ad62016-06-10 12:03:08 -0700237 else:
238 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
239 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700240
241
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800242 def get_servod_server_proxy(self):
243 """Return a cached proxy if exists; otherwise, create a new one.
244
245 @returns: An xmlrpclib.ServerProxy that is connected to the servod
246 server on the host.
247 """
248 # Single-threaded execution, no race
249 if self._servod_server_proxy is None:
250 self._servod_server_proxy = self._create_servod_server_proxy()
251 return self._servod_server_proxy
252
253
Richard Barnette1edbb162016-11-01 11:47:50 -0700254 def verify(self, silent=False):
255 """Update the servo host and verify it's in a good state.
256
257 @param silent If true, suppress logging in `status.log`.
258 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700259 message = 'Beginning verify for servo host %s port %s serial %s'
260 message %= (self.hostname, self.servo_port, self.servo_serial)
261 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700262 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700263 self._repair_strategy.verify(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700264 self._servo_state = servo_constants.SERVO_STATE_WORKING
265 self.record('INFO', None, None,
266 'ServoHost verify set servo_state as WORKING')
Garry Wang63b8c382020-03-11 22:28:40 -0700267 except Exception as e:
Garry Wang11b5e872020-03-11 15:14:08 -0700268 self._servo_state = servo_constants.SERVO_STATE_BROKEN
269 self.record('INFO', None, None,
270 'ServoHost verify set servo_state as BROKEN')
Garry Wang63b8c382020-03-11 22:28:40 -0700271 if self._is_critical_error(e):
272 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700273
274
Garry Wang7b0e1b72020-03-25 19:08:59 -0700275 def get_image_name_from_usbkey(self, usbkey_dev):
276 """Mount usb drive and check ChromeOS image name on it if there is
277 one. This method assumes the image_usbkey_direction is already set
278 to servo side.
279
280 @param: usbkey_dev usbkey dev path(e.g. /dev/sdb).
281
282 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
283 or empty string if no test image detected, or unexpected
284 error occurred.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700285 """
Garry Wang70e5d062020-04-03 18:01:05 -0700286 logging.info('Checking ChromeOS image name on usbkey.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700287 usb_mount_path = '/media/servo_usb/%s' % self.servo_port
288 unmount_cmd = 'umount %s' % usb_mount_path
289 # ChromeOS root fs is in /dev/sdx3
290 mount_cmd = 'mount -o ro %s3 %s' % (usbkey_dev, usb_mount_path)
291 # Unmount if there is an existing stale mount.
292 self.run(unmount_cmd, ignore_status=True)
293 # Create if the mount point is not existing.
294 self.run('mkdir -p %s' % usb_mount_path)
295 try:
296 # Attempt to mount the usb drive.
297 mount_result = self.run(mount_cmd, ignore_status=True)
Garry Wang70e5d062020-04-03 18:01:05 -0700298 if mount_result.exit_status != 0:
299 logging.error('Unexpected error occurred on mount usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700300 return ''
Garry Wang70e5d062020-04-03 18:01:05 -0700301
302 release_content = self.run(
303 'cat %s/etc/lsb-release' % usb_mount_path,
304 ignore_status=True).stdout.strip()
305
306 if not re.search(r'RELEASE_TRACK=.*test', release_content):
307 logging.info('The image on usbkey is not a test image')
308 return ''
309
310 return lsbrelease_utils.get_chromeos_release_builder_path(
311 lsb_release_content=release_content)
Garry Wang7b0e1b72020-03-25 19:08:59 -0700312 finally:
Garry Wang70e5d062020-04-03 18:01:05 -0700313 logging.debug('Image check compeleted, unmounting the usb drive.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700314 self.run(unmount_cmd, ignore_status=True)
315
316
Garry Wang70e5d062020-04-03 18:01:05 -0700317 def _probe_and_validate_usb_dev(self):
318 """This method probe the usb dev path by talking to servo, and then
319 validate the dev path is valid block device to servohost.
320 Possible output:
321 1. Encounter error during probe usb dev, returns empty string.
322 2. probe usb dev completed without error but cannot find usb dev,
323 raise AutoservRepairError.
324 3. probe usb dev find a usb dev path, but failed validation in this
325 method, raise AutoservRepairError.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700326
Garry Wang70e5d062020-04-03 18:01:05 -0700327 @returns: A string of usb dev path(e.g. '/dev/sdb'), or empty string
328 if unexpected error occurred during probe.
329 @raises: AutoservRepairError if servo couldn't probe the usb dev path
330 (servo.probe_host_usb_dev() returns empty string), or the dev path is
331 not valid block device to servohost.
Garry Wang7b0e1b72020-03-25 19:08:59 -0700332 """
333 logging.info('Validating image usbkey on servo.')
Garry Wang7b0e1b72020-03-25 19:08:59 -0700334 try:
Garry Wang70e5d062020-04-03 18:01:05 -0700335 usb_dev = self._servo.probe_host_usb_dev()
Garry Wang7b0e1b72020-03-25 19:08:59 -0700336 except Exception as e:
337 # We don't want any unexpected or transient servo communicating
338 # failure block usb repair, so capture all errors here.
339 logging.error(e, exc_info=True)
340 logging.error('Unexpected error occurred on get usbkey dev path,'
341 ' skipping usbkey validation.')
342 return ''
343
Garry Wang70e5d062020-04-03 18:01:05 -0700344 if usb_dev:
345 # probe_host_usb_dev() sometimes return stale record,
346 # so we need to make sure the path exists in fdisk.
347 validate_cmd = 'fdisk -l | grep %s' % usb_dev
348 resp = self.run(validate_cmd, ignore_status=True)
349 if resp.exit_status == 0:
350 return usb_dev
351
352 logging.error('%s is reported from "image_usbkey_dev" control but'
353 ' not detected from fdisk!', usb_dev)
354
355 raise hosts.AutoservRepairError(
356 'No usbkey detected on servo, the usbkey may be either missing'
357 ' or broken. Please replace usbkey on the servo and retry.',
358 'missing usbkey')
359
360
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700361 def is_ec_supported(self):
Garry Wang9b8f2342020-04-17 16:34:09 -0700362 """Check if ec is supported on the servo_board"""
Otabek Kasimov4ea636e2020-04-14 23:35:06 -0700363 if self.servo_board:
364 try:
365 frm_config = config.Config(self.servo_board, self.servo_model)
366 return frm_config.chrome_ec
367 except Exception as e:
368 logging.error('Unexpected error when read from firmware'
369 ' configs; %s', str(e))
370 return False
371
Garry Wang70e5d062020-04-03 18:01:05 -0700372 def validate_image_usbkey(self):
373 """This method first validate if there is a recover usbkey on servo
374 that accessible to servohost, and second check if a ChromeOS image is
375 already on the usb drive and return the image_name so we can avoid
376 unnecessary download and flash to the recover usbkey on servo.
377
378 Please note that, there is special error handling logic here:
379 1. If unexpected error happens, we return empty string. So repair
380 actions will not get blocked.
381 2. If no working usbkey present on servo, but no errors, we'll raise
382 AutoservRepairError here.
383
384 @returns: image_name on the usbkey, e.g. nami-release/R82.10138.0.0,
385 or empty string if no test image detected, or unexpected
386 error occurred.
387 @raises: AutoservRepairError if the usbkey is not detected on servo.
388 """
389 usb_dev = self._probe_and_validate_usb_dev()
390 if usb_dev:
391 return self.get_image_name_from_usbkey(usb_dev)
392 else:
393 return ''
Garry Wang7b0e1b72020-03-25 19:08:59 -0700394
395
Richard Barnette1edbb162016-11-01 11:47:50 -0700396 def repair(self, silent=False):
397 """Attempt to repair servo host.
398
399 @param silent If true, suppress logging in `status.log`.
400 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700401 message = 'Beginning repair for servo host %s port %s serial %s'
402 message %= (self.hostname, self.servo_port, self.servo_serial)
403 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700404 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700405 self._repair_strategy.repair(self, silent)
Garry Wang11b5e872020-03-11 15:14:08 -0700406 self._servo_state = servo_constants.SERVO_STATE_WORKING
407 self.record('INFO', None, None,
408 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700409 # If target is a labstation then try to withdraw any existing
410 # reboot request created by this servo because it passed repair.
411 if self.is_labstation():
412 self.withdraw_reboot_request()
Garry Wang63b8c382020-03-11 22:28:40 -0700413 except Exception as e:
Garry Wang11b5e872020-03-11 15:14:08 -0700414 self._servo_state = servo_constants.SERVO_STATE_BROKEN
415 self.record('INFO', None, None,
416 'ServoHost repair set servo_state as BROKEN')
Garry Wang63b8c382020-03-11 22:28:40 -0700417 if self._is_critical_error(e):
418 self.disconnect_servo()
419 self.stop_servod()
420 raise
421
422
423 def _is_critical_error(self, error):
424 if (isinstance(error, hosts.AutoservVerifyDependencyError)
425 and not error.is_critical()):
426 logging.warning('Non-critical verify failure(s) detected during'
427 ' verify/repair servo, servo connection will'
428 ' still up but may not fully functional.'
429 ' Some repair actions and servo depended'
430 ' tests may not run.')
431 return False
432 logging.info('Critical verify failure(s) detected during repair/verify'
433 ' servo. Disconnecting servo and stop servod, all repair '
434 'action and tests that depends on servo will not run.')
435 return True
Fang Deng5d518f42013-08-02 14:04:32 -0700436
437
Dan Shi4d478522014-02-14 13:46:32 -0800438 def get_servo(self):
439 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700440
Dan Shi4d478522014-02-14 13:46:32 -0800441 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700442 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700443 """
Dan Shi4d478522014-02-14 13:46:32 -0800444 return self._servo
445
446
Garry Wang79e9af62019-06-12 15:19:19 -0700447 def request_reboot(self):
448 """Request servohost to be rebooted when it's safe to by touch a file.
449 """
450 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700451 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700452 self.run('touch %s' % self._reboot_file, ignore_status=True)
453
454
Garry Wang464ff1e2019-07-18 17:20:34 -0700455 def withdraw_reboot_request(self):
456 """Withdraw a servohost reboot request if exists by remove the flag
457 file.
458 """
459 logging.debug('Withdrawing request to reboot servohost %s that created'
460 ' by servo with port # %s if exists.',
461 self.hostname, self.servo_port)
462 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
463
464
Garry Wangc1288cf2019-12-17 14:58:00 -0800465 def start_servod(self, quick_startup=False):
466 """Start the servod process on servohost.
467 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800468 # Skip if running on the localhost.(crbug.com/1038168)
469 if self.is_localhost():
470 logging.debug("Servohost is a localhost, skipping start servod.")
471 return
472
473 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800474 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800475 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800476 if self.servo_model:
477 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800478 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800479 logging.warning('Board for DUT is unknown; starting servod'
480 ' assuming a pre-configured board.')
481
482 cmd += ' PORT=%d' % self.servo_port
483 if self.servo_serial:
484 cmd += ' SERIAL=%s' % self.servo_serial
Garry Wangd7367482020-02-27 13:52:40 -0800485
486 # Start servod with dual_v4 if the DUT/servo from designated pools.
487 dut_host_info = self.get_dut_host_info()
488 if dut_host_info:
Garry Wang11b5e872020-03-11 15:14:08 -0700489 if bool(dut_host_info.pools &
490 servo_constants.POOLS_SUPPORT_DUAL_V4):
Garry Wangd7367482020-02-27 13:52:40 -0800491 logging.debug('The DUT is detected in following designated'
492 ' pools %s,starting servod with DUAL_V4 option.',
Garry Wang11b5e872020-03-11 15:14:08 -0700493 servo_constants.POOLS_SUPPORT_DUAL_V4)
Garry Wangd7367482020-02-27 13:52:40 -0800494 cmd += ' DUAL_V4=1'
495
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800496 # Remove the symbolic links from the logs. This helps ensure that
497 # a failed servod instantiation does not cause us to grab old logs
498 # by mistake.
499 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800500 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800501
502 # There's a lag between when `start servod` completes and when
503 # the _ServodConnectionVerifier trigger can actually succeed.
504 # The call to time.sleep() below gives time to make sure that
505 # the trigger won't fail after we return.
506
507 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
508 # But in the rare case all servo on a labstation are in heavy use they
509 # may take ~30 seconds. So the timeout value will double these value,
510 # and we'll try quick start up when first time initialize servohost,
511 # and use standard start up timeout in repair.
512 if quick_startup:
Garry Wang11b5e872020-03-11 15:14:08 -0700513 timeout = servo_constants.SERVOD_QUICK_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800514 else:
Garry Wang11b5e872020-03-11 15:14:08 -0700515 timeout = servo_constants.SERVOD_STARTUP_TIMEOUT
Garry Wangc1288cf2019-12-17 14:58:00 -0800516 logging.debug('Wait %s seconds for servod process fully up.', timeout)
517 time.sleep(timeout)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700518 # Cache the initial instance timestamp to check against servod restarts
519 self._initial_instance_ts = self.get_instance_logs_ts()
Garry Wangc1288cf2019-12-17 14:58:00 -0800520
521
522 def stop_servod(self):
523 """Stop the servod process on servohost.
524 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800525 # Skip if running on the localhost.(crbug.com/1038168)
526 if self.is_localhost():
527 logging.debug("Servohost is a localhost, skipping stop servod.")
528 return
529
Garry Wangc1288cf2019-12-17 14:58:00 -0800530 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800531 self.run('stop servod PORT=%d' % self.servo_port,
532 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800533 logging.debug('Wait %s seconds for servod process fully teardown.',
Garry Wang11b5e872020-03-11 15:14:08 -0700534 servo_constants.SERVOD_TEARDOWN_TIMEOUT)
535 time.sleep(servo_constants.SERVOD_TEARDOWN_TIMEOUT)
Garry Wangc1288cf2019-12-17 14:58:00 -0800536
537
538 def restart_servod(self, quick_startup=False):
539 """Restart the servod process on servohost.
540 """
541 self.stop_servod()
542 self.start_servod(quick_startup)
543
Garry Wang11b5e872020-03-11 15:14:08 -0700544
Garry Wangffbd2162020-04-17 16:13:48 -0700545 def _process_servodtool_error(self, response):
546 """Helper function to handle non-zero servodtool response.
547 """
548 if re.search(servo_constants.ERROR_MESSAGE_USB_HUB_NOT_COMPATIBLE,
Garry Wangad245002020-05-15 15:20:23 -0700549 response.stdout):
Garry Wangffbd2162020-04-17 16:13:48 -0700550 logging.error('The servo is not plugged on a usb hub that supports'
551 ' power-cycle!')
Garry Wang000c6c02020-05-11 21:27:23 -0700552 # change the flag so we can update this label in later process.
553 self.smart_usbhub = False
Garry Wangffbd2162020-04-17 16:13:48 -0700554 return
555
556 if re.search(servo_constants.ERROR_MESSAGE_DEVICE_NOT_FOUND %
557 self.servo_serial, response.stdout):
558 logging.error('No servo with serial %s found!', self.servo_serial)
559 return
560
561 logging.error('Unexpected error occurred from usbhub control, please'
562 ' file a bug and inform chrome-fleet-software@ team!')
563
564
565 def _get_servo_usb_devnum(self):
566 """Helper function to collect current usb devnum of servo.
567 """
Otabek Kasimov09192682020-06-01 18:17:44 -0700568 # TODO remove try-except when fix crbug.com/1087964
569 try:
570 cmd = 'servodtool device -s %s usb-path' % self.servo_serial
571 resp = self.run(cmd, ignore_status=True, timeout=30)
572 except Exception as e:
573 # Here we catch only timeout errors.
574 # Other errors is filtered by ignore_status=True
575 logging.debug('Attempt to get servo usb-path failed due to '
576 'timeout; %s', e)
577 return ''
578
Garry Wangffbd2162020-04-17 16:13:48 -0700579 if resp.exit_status != 0:
580 self._process_servodtool_error(resp)
581 return ''
582 usb_path = resp.stdout.strip()
583 logging.info('Usb path of servo %s is %s', self.servo_serial, usb_path)
584
585 resp = self.run('cat %s/devnum' % usb_path,
586 ignore_status=True)
587 if resp.exit_status != 0:
588 self._process_servodtool_error(resp)
589 return ''
590 return resp.stdout.strip()
591
592
593 def _reset_servo(self):
594 logging.info('Resetting servo through smart usbhub.')
Otabek Kasimov09192682020-06-01 18:17:44 -0700595 # TODO remove try-except when fix crbug.com/1087964
596 try:
597 resp = self.run('servodtool device -s %s power-cycle' %
598 self.servo_serial, ignore_status=True,
599 timeout=30)
600 if resp.exit_status != 0:
601 self._process_servodtool_error(resp)
602 return False
603 except Exception as e:
604 # Here we catch only timeout errors.
605 # Other errors is filtered by ignore_status=True
606 logging.debug('Attempt to reset servo failed due to timeout;'
607 ' %s', e)
Garry Wangffbd2162020-04-17 16:13:48 -0700608 return False
609
610 logging.debug('Wait %s seconds for servo to come back from reset.',
611 servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
612 time.sleep(servo_constants.SERVO_RESET_TIMEOUT_SECONDS)
Garry Wang000c6c02020-05-11 21:27:23 -0700613 # change the flag so we can update this label in later process.
614 self.smart_usbhub = True
Garry Wangffbd2162020-04-17 16:13:48 -0700615 return True
616
617
618 def reset_servo(self):
619 """Reset(power-cycle) the servo via smart usbhub.
620 """
621 if not self.is_labstation():
622 logging.info('Servo reset is not applicable to servo_v3.')
623 return
624
625 pre_reset_devnum = self._get_servo_usb_devnum()
626 logging.info('Servo usb devnum before reset: %s', pre_reset_devnum)
627 result = self._reset_servo()
628 if not result:
629 message = ('Failed to reset servo with serial: %s'
630 % self.servo_serial)
631 logging.warning(message)
632 self.record('INFO', None, None, message)
633 return
634
635 post_reset_devnum = self._get_servo_usb_devnum()
636 logging.info('Servo usb devnum after reset: %s', post_reset_devnum)
637 if not (pre_reset_devnum and post_reset_devnum):
638 message = ('Servo reset completed but unable to verify'
639 ' devnum change!')
640 elif pre_reset_devnum != post_reset_devnum:
641 message = ('Reset servo with serial %s completed successfully!'
642 % self.servo_serial)
643 else:
644 message = 'Servo reset completed but devnum is still not changed!'
645 logging.info(message)
646 self.record('INFO', None, None, message)
647
648
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800649 def _extract_compressed_logs(self, logdir, relevant_files):
650 """Decompress servod logs in |logdir|.
651
652 @param logdir: directory containing compressed servod logs.
653 @param relevant_files: list of files in |logdir| to consider.
654
655 @returns: tuple, (tarfiles, files) where
656 tarfiles: list of the compressed filenames that have been
657 extracted and deleted
658 files: list of the uncompressed files that were generated
659 """
660 # For all tar-files, first extract them to the directory, and
661 # then let the common flow handle them.
662 tarfiles = [cf for cf in relevant_files if
663 cf.endswith(self.COMPRESSION_SUFFIX)]
664 files = []
665 for f in tarfiles:
666 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
667 with tarfile.open(f) as tf:
668 # Each tarfile has only one member, as
669 # that's the compressed log.
670 member = tf.members[0]
671 # Manipulate so that it only extracts the basename, and not
672 # the directories etc.
673 member.name = norm_name
674 files.append(os.path.join(logdir, member.name))
675 tf.extract(member, logdir)
676 # File has been extracted: remove the compressed file.
677 os.remove(f)
678 return tarfiles, files
679
680 def _extract_mcu_logs(self, log_subdir):
681 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
682
683 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
684 lines from the logs to generate invidiual console logs e.g. after
685 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
686 those MCUs had any console input/output.
687
688 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
689 """
690 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
691 # files
692 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
693 if not os.path.exists(mcu_lines_file):
694 logging.info('No DEBUG logs found to extract MCU logs from.')
695 return
696 mcu_files = {}
697 mcu_file_template = '%s.txt'
698 with open(mcu_lines_file, 'r') as f:
699 for line in f:
700 match = self.MCU_EXTRACTOR.match(line)
701 if match:
702 mcu = match.group(self.MCU_GROUP).lower()
703 line = match.group(self.LINE_GROUP)
704 if mcu not in mcu_files:
705 mcu_file = os.path.join(log_subdir,
706 mcu_file_template % mcu)
707 mcu_files[mcu] = open(mcu_file, 'a')
708 fd = mcu_files[mcu]
709 fd.write(line + '\n')
710 for f in mcu_files:
711 mcu_files[f].close()
712
713
714 def remove_latest_log_symlinks(self):
715 """Remove the conveninence symlinks 'latest' servod logs."""
716 symlink_wildcard = '%s/latest*' % self.remote_log_dir
717 cmd = 'rm ' + symlink_wildcard
718 self.run(cmd, stderr_tee=None, ignore_status=True)
719
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700720 def probe_servod_restart(self, instance_ts, outdir):
721 """Grab servod logs from previous instances if part of this session.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800722
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700723 If since the last time this host called start_servod() servod crashed
724 and restarted, this helper finds those logs as well, and stores them
725 with the |OLD_LOG_SUFFIX| to investigate if necessary.
Prasad Vuppalapu5bd9da12020-03-31 01:46:47 +0000726
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700727 It also issues a panicinfo command to servo devices after the restart
728 to try and collect reboot information for debugging.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800729
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700730 @param instance_ts: the log timestamp that the current instance uses
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800731 @param outdir: directory to create a subdirectory into to place the
732 servod logs into.
733 """
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700734 if self._initial_instance_ts is None:
735 logging.info('No log timestamp grabbed successfully on servod '
736 'startup. Cannot check device restarts. Ignoring.')
737 return
738 if instance_ts == self._initial_instance_ts:
739 logging.debug('Servod appears to have run without restarting')
740 return
741 # Servod seems to have restarted (at least once). |_initial_instance_ts|
742 # is the first timestamp, and instance_ts is the current timestamp. Find
743 # all timestamps in between them, and grab the logs for each.
744 tss = self._find_instance_timestamps_between(self._initial_instance_ts,
745 instance_ts)
746 logging.info('Servod has restarted %d times between the start and the '
747 'end of this servo_host.', len(tss))
748 logging.info('This might be an issue. Will extract all logs from each '
749 'instance.')
750 logging.info('Logs that are not the currently running (about to turn '
751 'down) instance are maked with a .%s in their folder.',
752 self.OLD_LOG_SUFFIX)
753 for ts in tss:
754 self.get_instance_logs(ts, outdir, old=True)
755 # Lastly, servod has restarted due to a potential issue. Try to get
756 # panic information from servo micro and servo v4 for the current logs.
757 # This can only happen if the |_servo| attribute is initialized.
758 if self._servo:
759 for mcu in ['servo_micro', 'servo_v4']:
760 ctrl = '%s_uart_cmd' % mcu
761 if self._servo.has_control(ctrl):
762 logging.info('Trying to retrieve %r panicinfo into logs',
763 mcu)
764 try:
765 self._servo.set_nocheck(ctrl, 'panicinfo')
766 except error.TestFail as e:
767 logging.error('Failed to generate panicinfo for %r '
768 'logs. %s', mcu, str(e))
769
770 def _find_instance_timestamps_between(self, start_ts, end_ts):
771 """Find all log timestamps between [start_ts, end_ts).
772
773 @param start_ts: str, earliest log timestamp of interest
774 @param end_ts: str, latest log timestamp of interest
775
776 @returns: list, all timestamps between start_ts and end_ts, end_ts
777 exclusive, on the servo_host. An empty list on errors
778 """
779 # Simply get all timestamp, and then sort and remove
780 cmd = 'ls %s' % self.remote_log_dir
781 res = self.run(cmd, stderr_tee=None, ignore_status=True)
782 if res.exit_status != 0:
783 # Here we failed to find anything.
784 logging.info('Failed to find remote servod logs. Ignoring.')
785 return []
786 logfiles = res.stdout.strip().split()
787 timestamps = set()
788 for logfile in logfiles:
789 ts_match = self.TS_EXTRACTOR.match(logfile)
790 if not ts_match:
791 # Simply ignore files that fail the check. It might be the
792 # 'latest' symlinks or random files.
793 continue
794 timestamps.add(ts_match.group(self.TS_GROUP))
795 # At this point we have all unique timestamps.
796 timestamps = sorted(timestamps)
797 for ts in [start_ts, end_ts]:
798 if ts not in timestamps:
799 logging.error('Timestamp %r not in servod logs. Cannot query '
800 'for timestamps in between %r and %r', ts,
801 start_ts, end_ts)
802 return []
803 return timestamps[timestamps.index(start_ts):timestamps.index(end_ts)]
804
805 def get_instance_logs_ts(self):
806 """Retrieve the currently running servod instance's log timestamp
807
808 @returns: str, timestamp for current instance, or None on failure
809 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800810 # First, extract the timestamp. This cmd gives the real filename of
811 # the latest aka current log file.
812 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
813 'then realpath %(dir)s/latest.DEBUG;'
814 'elif [ -f %(dir)s/latest ];'
815 'then realpath %(dir)s/latest;'
816 'else exit %(code)d;'
817 'fi' % {'dir': self.remote_log_dir,
818 'code': self.NO_SYMLINKS_CODE})
819 res = self.run(cmd, stderr_tee=None, ignore_status=True)
820 if res.exit_status != 0:
821 if res.exit_status == self.NO_SYMLINKS_CODE:
822 logging.warning('servod log latest symlinks not found. '
823 'This is likely due to an error starting up '
824 'servod. Ignoring..')
825 else:
826 logging.warning('Failed to find servod logs on servo host.')
827 logging.warning(res.stderr.strip())
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700828 return None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800829 fname = os.path.basename(res.stdout.strip())
830 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
Ruben Rodriguez Buchillone9aa2b02020-03-04 12:14:28 -0800831 ts_match = self.TS_EXTRACTOR.match(fname)
832 if not ts_match:
833 logging.warning('Failed to extract timestamp from servod log file '
834 '%r. Skipping. The servo host is using outdated '
835 'servod logging and needs to be updated.', fname)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700836 return None
837 return ts_match.group(self.TS_GROUP)
838
839 def get_instance_logs(self, instance_ts, outdir, old=False):
840 """Collect all logs with |instance_ts| and dump into a dir in |outdir|
841
842 This method first collects all logs on the servo_host side pertaining
843 to this servod instance (port, instatiation). It glues them together
844 into combined log.[level].txt files and extracts all available MCU
845 console I/O from the logs into individual files e.g. servo_v4.txt
846
847 All the output can be found in a directory inside |outdir| that
848 this generates based on |LOG_DIR|, the servod port, and the instance
849 timestamp on the servo_host side.
850
851 @param instance_ts: log timestamp to grab logfiles for
852 @param outdir: directory to create a subdirectory into to place the
853 servod logs into.
854 @param old: bool, whether to append |OLD_LOG_SUFFIX| to output dir
855 """
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800856 # Create the local results log dir.
857 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
858 str(self.servo_port),
859 instance_ts))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700860 if old:
861 log_dir = '%s.%s' % (log_dir, self.OLD_LOG_SUFFIX)
862 logging.info('Saving servod logs to %r.', log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800863 os.mkdir(log_dir)
864 # Now, get all files with that timestamp.
865 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
866 instance_ts)
867 res = self.run(cmd, stderr_tee=None, ignore_status=True)
868 files = res.stdout.strip().split()
869 try:
870 self.get_file(files, log_dir, try_rsync=False)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700871 if not os.listdir(log_dir):
872 logging.info('No servod logs retrieved. Ignoring, and removing '
873 '%r again.', log_dir)
874 os.rmdir(log_dir)
875 return
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800876 except error.AutoservRunError as e:
877 result = e.result_obj
878 if result.exit_status != 0:
879 stderr = result.stderr.strip()
880 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
881 stderr or '\n%s' % result)
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700882 # Remove the log_dir as nothing was added to it.
883 os.rmdir(log_dir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800884 return
885 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
886 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
887 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
888 # Create the joint files for each loglevel. i.e log.DEBUG
889 joint_file = self.JOINT_LOG_PREFIX
890 if level_name:
891 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
892 # This helps with some online tools to avoid complaints about an
893 # unknown filetype.
894 joint_file = joint_file + '.txt'
895 joint_path = os.path.join(log_dir, joint_file)
896 files = [f for f in local_files if level_name in f]
897 if not files:
898 # TODO(crrev.com/c/1793030): remove no-level case once CL
899 # is pushed
900 continue
901 # Extract compressed logs if any.
902 compressed, extracted = self._extract_compressed_logs(log_dir,
903 files)
904 files = list(set(files) - set(compressed))
905 files.extend(extracted)
906 # Need to sort. As they all share the same timestamp, and
907 # loglevel, the index itself is sufficient. The highest index
908 # is the oldest file, therefore we need a descending sort.
909 def sortkey(f, level=level_name):
910 """Custom sortkey to sort based on rotation number int."""
911 if f.endswith(level_name): return 0
912 return int(f.split('.')[-1])
913
914 files.sort(reverse=True, key=sortkey)
915 # Just rename the first file rather than building from scratch.
916 os.rename(files[0], joint_path)
917 with open(joint_path, 'a') as joint_f:
918 for logfile in files[1:]:
919 # Transfer the file to the joint file line by line.
920 with open(logfile, 'r') as log_f:
921 for line in log_f:
922 joint_f.write(line)
923 # File has been written over. Delete safely.
924 os.remove(logfile)
925 # Need to remove all files form |local_files| so we don't
926 # analyze them again.
927 local_files = list(set(local_files) - set(files) - set(compressed))
928 # Lastly, extract MCU logs from the joint logs.
929 self._extract_mcu_logs(log_dir)
930
Garry Wang79e9af62019-06-12 15:19:19 -0700931 def _lock(self):
932 """lock servohost by touching a file.
933 """
934 logging.debug('Locking servohost %s by touching %s file',
935 self.hostname, self._lock_file)
936 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700937 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -0700938
939
940 def _unlock(self):
941 """Unlock servohost by removing the lock file.
942 """
943 logging.debug('Unlocking servohost by removing %s file',
944 self._lock_file)
945 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700946 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -0700947
948
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700949 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -0700950 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700951 # NOTE: throughout this method there are multiple attempts to catch
952 # all errors. This is WAI as log grabbing should not fail tests.
953 # However, the goal is to catch and handle/process all errors, thus
954 # we print the traceback and ask for a bug.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800955 if self._closed:
956 logging.debug('ServoHost is already closed.')
957 return
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700958 instance_ts = self.get_instance_logs_ts()
959 # TODO(crbug.com/1011516): once enabled, remove the check against
960 # localhost and instead check against log-rotiation enablement.
961 logs_available = (instance_ts is not None and
962 self.job and
963 not self.is_localhost())
964 if logs_available:
965 # Probe whether there was a servod restart, and grab those old
966 # logs as well.
967 try:
968 self.probe_servod_restart(instance_ts, self.job.resultdir)
969 except (error.AutoservRunError, error.TestFail) as e:
970 logging.info('Failed to grab servo logs due to: %s. '
971 'This error is forgiven.', str(e))
972 except Exception as e:
973 logging.error('Unexpected error probing for old logs. %s. '
974 'Forgiven. Please file a bug and fix or catch '
975 'in log probing function', str(e),
976 exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700977 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800978 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -0700979 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800980 self._servo.close(outdir)
981
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700982 if logs_available:
983 # Grab current (not old like above) logs after the servo instance
984 # was closed out.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800985 try:
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700986 self.get_instance_logs(instance_ts, self.job.resultdir)
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800987 except error.AutoservRunError as e:
988 logging.info('Failed to grab servo logs due to: %s. '
989 'This error is forgiven.', str(e))
Ruben Rodriguez Buchillon5bac3062020-03-25 21:32:58 -0700990 except Exception as e:
991 logging.error('Unexpected error grabbing servod logs. %s. '
992 'Forgiven. Please file a bug and fix or catch '
993 'in log grabbing function', str(e), exc_info=True)
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700994
Garry Wang7c00b0f2019-06-25 17:28:17 -0700995 if self._is_locked:
996 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -0700997 try:
998 self._unlock()
999 except error.AutoservSSHTimeout:
1000 logging.error('Unlock servohost failed due to ssh timeout.'
1001 ' It may caused by servohost went down during'
1002 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -08001003 # We want always stop servod after task to minimum the impact of bad
1004 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -08001005 try:
1006 self.stop_servod()
1007 except error.AutoservRunError as e:
1008 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001009 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -08001010
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001011 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -08001012 # Mark closed.
1013 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -07001014
1015
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001016 def get_servo_state(self):
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001017 if self._servo_state is None:
Garry Wang11b5e872020-03-11 15:14:08 -07001018 return servo_constants.SERVO_STATE_UNKNOWN
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001019 return self._servo_state
Otabek Kasimovcc9738e2020-02-14 16:17:15 -08001020
1021
Richard Barnetteea3e4602016-06-10 12:36:41 -07001022def make_servo_hostname(dut_hostname):
1023 """Given a DUT's hostname, return the hostname of its servo.
1024
1025 @param dut_hostname: hostname of a DUT.
1026
1027 @return hostname of the DUT's servo.
1028
1029 """
1030 host_parts = dut_hostname.split('.')
1031 host_parts[0] = host_parts[0] + '-servo'
1032 return '.'.join(host_parts)
1033
1034
1035def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001036 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001037
1038 @param servo_hostname: hostname of the servo host.
1039
1040 @return True if it's up, False otherwise
1041 """
1042 # Technically, this duplicates the SSH ping done early in the servo
1043 # proxy initialization code. However, this ping ends in a couple
1044 # seconds when if fails, rather than the 60 seconds it takes to decide
1045 # that an SSH ping has timed out. Specifically, that timeout happens
1046 # when our servo DNS name resolves, but there is no host at that IP.
1047 logging.info('Pinging servo host at %s', servo_hostname)
1048 ping_config = ping_runner.PingConfig(
1049 servo_hostname, count=3,
1050 ignore_result=True, ignore_status=True)
1051 return ping_runner.PingRunner().ping(ping_config).received > 0
1052
1053
Richard Barnettee519dcd2016-08-15 17:37:17 -07001054def _map_afe_board_to_servo_board(afe_board):
1055 """Map a board we get from the AFE to a servo appropriate value.
1056
1057 Many boards are identical to other boards for servo's purposes.
1058 This function makes that mapping.
1059
1060 @param afe_board string board name received from AFE.
1061 @return board we expect servo to have.
1062
1063 """
1064 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
1065 BOARD_MAP = {'gizmo': 'panther'}
1066 mapped_board = afe_board
1067 if afe_board in BOARD_MAP:
1068 mapped_board = BOARD_MAP[afe_board]
1069 else:
1070 for suffix in KNOWN_SUFFIXES:
1071 if afe_board.endswith(suffix):
1072 mapped_board = afe_board[0:-len(suffix)]
1073 break
1074 if mapped_board != afe_board:
1075 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
1076 return mapped_board
1077
1078
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001079def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001080 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001081
Richard Barnetteea3e4602016-06-10 12:36:41 -07001082 @param dut_host Instance of `Host` on which to find the servo
1083 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -07001084 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001085 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001086 info = dut_host.host_info_store.get()
1087 servo_args = {k: v for k, v in info.attributes.iteritems()
Garry Wang11b5e872020-03-11 15:14:08 -07001088 if k in servo_constants.SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -07001089
Garry Wang11b5e872020-03-11 15:14:08 -07001090 if servo_constants.SERVO_PORT_ATTR in servo_args:
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001091 try:
Garry Wang11b5e872020-03-11 15:14:08 -07001092 servo_args[servo_constants.SERVO_PORT_ATTR] = int(
1093 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001094 except ValueError:
1095 logging.error('servo port is not an int: %s',
Garry Wang11b5e872020-03-11 15:14:08 -07001096 servo_args[servo_constants.SERVO_PORT_ATTR])
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001097 # Reset servo_args because we don't want to use an invalid port.
Garry Wang11b5e872020-03-11 15:14:08 -07001098 servo_args.pop(servo_constants.SERVO_HOST_ATTR, None)
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001099
1100 if info.board:
Garry Wang11b5e872020-03-11 15:14:08 -07001101 servo_board = _map_afe_board_to_servo_board(info.board)
1102 servo_args[servo_constants.SERVO_BOARD_ATTR] = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -07001103 if info.model:
Garry Wang11b5e872020-03-11 15:14:08 -07001104 servo_args[servo_constants.SERVO_MODEL_ATTR] = info.model
1105 return servo_args if servo_constants.SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -07001106
1107
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001108def _tweak_args_for_ssp_moblab(servo_args):
Garry Wang11b5e872020-03-11 15:14:08 -07001109 if (servo_args[servo_constants.SERVO_HOST_ATTR]
1110 in ['localhost', '127.0.0.1']):
1111 servo_args[servo_constants.SERVO_HOST_ATTR] = _CONFIG.get_config_value(
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001112 'SSP', 'host_container_ip', type=str, default=None)
1113
1114
Dan Shi023aae32016-05-25 11:13:01 -07001115def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001116 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001117 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -08001118
Richard Barnette9a26ad62016-06-10 12:03:08 -07001119 This function attempts to create and verify or repair a `ServoHost`
1120 object for a servo connected to the given `dut`, subject to various
1121 constraints imposed by the parameters:
1122 * When the `servo_args` parameter is not `None`, a servo
1123 host must be created, and must be checked with `repair()`.
1124 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
1125 true:
1126 * If `try_servo_repair` is true, then create a servo host and
1127 check it with `repair()`.
1128 * Otherwise, if the servo responds to `ping` then create a
1129 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -08001130
Richard Barnette9a26ad62016-06-10 12:03:08 -07001131 In cases where `servo_args` was not `None`, repair failure
1132 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001133 are logged and then discarded. Note that this only happens in cases
1134 where we're called from a test (not special task) control file that
1135 has an explicit dependency on servo. In that case, we require that
1136 repair not write to `status.log`, so as to avoid polluting test
1137 results.
1138
1139 TODO(jrbarnette): The special handling for servo in test control
1140 files is a thorn in my flesh; I dearly hope to see it cut out before
1141 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001142
1143 Parameters for a servo host consist of a host name, port number, and
1144 DUT board, and are determined from one of these sources, in order of
1145 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -07001146 * Servo attributes from the `dut` parameter take precedence over
1147 all other sources of information.
1148 * If a DNS entry for the servo based on the DUT hostname exists in
1149 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -07001150 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001151 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -07001152 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -07001153
1154 @param dut An instance of `Host` from which to take
1155 servo parameters (if available).
1156 @param servo_args A dictionary with servo parameters to use if
1157 they can't be found from `dut`. If this
1158 argument is supplied, unrepaired exceptions
1159 from `verify()` will be passed back to the
1160 caller.
1161 @param try_lab_servo If not true, servo host creation will be
1162 skipped unless otherwise required by the
1163 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -07001164 @param try_servo_repair If true, check a servo host with
1165 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -08001166
1167 @returns: A ServoHost object or None. See comments above.
1168
1169 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001170 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001171 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -07001172 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -07001173 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -07001174 if utils.in_moblab_ssp():
1175 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001176 logging.debug(
1177 'Overriding provided servo_args (%s) with arguments'
1178 ' determined from the host (%s)',
1179 servo_args,
1180 servo_args_override,
1181 )
Richard Barnetteea3e4602016-06-10 12:36:41 -07001182 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -07001183
Richard Barnetteea3e4602016-06-10 12:36:41 -07001184 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001185 logging.debug('No servo_args provided, and failed to find overrides.')
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001186 if try_lab_servo or servo_dependency:
Garry Wang11b5e872020-03-11 15:14:08 -07001187 return None, servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001188 else:
1189 # For regular test case which not required the servo
1190 return None, None
1191
Garry Wang11b5e872020-03-11 15:14:08 -07001192 servo_hostname = servo_args.get(servo_constants.SERVO_HOST_ATTR)
1193 servo_port = servo_args.get(servo_constants.SERVO_PORT_ATTR)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001194 if not _is_servo_host_information_exist(servo_hostname, servo_port):
1195 logging.debug(
1196 'Servo connection info missed hostname: %s , port: %s',
1197 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001198 return None, servo_constants.SERVO_STATE_NOT_CONNECTED
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001199 if not is_servo_host_information_valid(servo_hostname, servo_port):
1200 logging.debug(
1201 'Servo connection info is incorrect hostname: %s , port: %s',
1202 servo_hostname, servo_port)
Garry Wang11b5e872020-03-11 15:14:08 -07001203 return None, servo_constants.SERVO_STATE_WRONG_CONFIG
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001204 if (not servo_dependency and not try_servo_repair and
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001205 not servo_host_is_up(servo_hostname)):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001206 logging.debug('ServoHost is not up.')
Garry Wang11b5e872020-03-11 15:14:08 -07001207 return None, servo_constants.SERVO_STATE_BROKEN
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001208
Garry Wangebc015b2019-06-06 17:45:06 -07001209 newhost = ServoHost(**servo_args)
Garry Wangffbd2162020-04-17 16:13:48 -07001210
1211 # Reset servo if the servo is locked, as we check if the servohost is up,
1212 # if the servohost is labstation and if the servohost is in lab inside the
1213 # locking logic. Also check try_servo_repair to make sure we only do this
1214 # in AdminRepair tasks.
1215 if newhost._is_locked and try_servo_repair:
1216 newhost.reset_servo()
1217
Garry Wangcdd27b22020-01-13 14:59:11 -08001218 try:
1219 newhost.restart_servod(quick_startup=True)
1220 except error.AutoservSSHTimeout:
1221 logging.warning("Restart servod failed due ssh connection "
1222 "to servohost timed out. This error is forgiven"
1223 " here, we will retry in servo repair process.")
1224 except error.AutoservRunError as e:
1225 logging.warning("Restart servod failed due to:\n%s\n"
1226 "This error is forgiven here, we will retry"
1227 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -07001228
Gregory Nisbetde13e2a2019-12-09 22:44:00 -08001229 # TODO(gregorynisbet): Clean all of this up.
1230 logging.debug('create_servo_host: attempt to set info store on '
1231 'servo host')
1232 try:
1233 if dut_host_info is None:
1234 logging.debug('create_servo_host: dut_host_info is '
1235 'None, skipping')
1236 else:
1237 newhost.set_dut_host_info(dut_host_info)
1238 logging.debug('create_servo_host: successfully set info '
1239 'store')
1240 except Exception:
1241 logging.error("create_servo_host: (%s)", traceback.format_exc())
1242
Richard Barnette9a26ad62016-06-10 12:03:08 -07001243 # Note that the logic of repair() includes everything done
1244 # by verify(). It's sufficient to call one or the other;
1245 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -07001246 if servo_dependency:
1247 newhost.repair(silent=True)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001248 return newhost, newhost.get_servo_state()
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001249
1250 if try_servo_repair:
1251 try:
1252 newhost.repair()
1253 except Exception:
1254 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -07001255 else:
1256 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001257 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -07001258 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -07001259 logging.exception('servo verify failed for %s', newhost.hostname)
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001260 return newhost, newhost.get_servo_state()
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001261
1262
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001263def _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001264 if hostname is None or len(hostname.strip()) == 0:
1265 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001266 if port is None:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001267 return False
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001268 if not type(port) is int:
1269 try:
1270 int(port)
1271 except ValueError:
1272 return False
1273
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001274 return True
1275
1276
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001277def is_servo_host_information_valid(hostname, port):
Garry Wang9b8f2342020-04-17 16:34:09 -07001278 """Check if provided servo attributes are valid.
1279
1280 @param hostname Hostname of the servohost.
1281 @param port servo port number.
1282
1283 @returns: A bool value to indicate if provided servo attribute valid.
1284 """
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001285 if not _is_servo_host_information_exist(hostname, port):
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001286 return False
1287 # checking range and correct of the port
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001288 port_int = int(port)
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001289 if port_int < 1 or port_int > 65000:
1290 return False
1291 # we expecting host contain only latters, digits and '-' or '_'
Otabek Kasimova7ba91a2020-03-09 08:31:01 -07001292 if not re.match('[a-zA-Z0-9-_\.]*$', hostname) or len(hostname) < 5:
Otabek Kasimov7267a7a2020-03-04 11:18:45 -08001293 return False
1294 return True