blob: 68e10479f30a6055e6aca0977100c2673d8fa24a [file] [log] [blame]
Fang Deng5d518f42013-08-02 14:04:32 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4#
5# Expects to be run in an environment with sudo and no interactive password
6# prompt, such as within the Chromium OS development chroot.
7
8
9"""This file provides core logic for servo verify/repair process."""
10
11
Fang Deng5d518f42013-08-02 14:04:32 -070012import logging
Raul E Rangel52ca2e82018-07-03 14:10:14 -060013import os
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080014import re
15import tarfile
Garry Wangc1288cf2019-12-17 14:58:00 -080016import time
Gregory Nisbet265a52c2019-12-10 20:38:42 -080017import traceback
Dana Goyette4dc0adc2019-05-06 14:51:53 -070018import xmlrpclib
Fang Deng5d518f42013-08-02 14:04:32 -070019
20from autotest_lib.client.bin import utils
Garry Wang79e9af62019-06-12 15:19:19 -070021from autotest_lib.client.common_lib import error
beeps5e8c45a2013-12-17 22:05:11 -080022from autotest_lib.client.common_lib import global_config
Richard Barnette9a26ad62016-06-10 12:03:08 -070023from autotest_lib.client.common_lib import hosts
Fang Deng5d518f42013-08-02 14:04:32 -070024from autotest_lib.client.common_lib.cros import retry
Christopher Wileycef1f902014-06-19 11:11:23 -070025from autotest_lib.client.common_lib.cros.network import ping_runner
Richard Barnette9a26ad62016-06-10 12:03:08 -070026from autotest_lib.server.cros.servo import servo
Richard Barnetted31580e2018-05-14 19:58:00 +000027from autotest_lib.server.hosts import servo_repair
Garry Wangebc015b2019-06-06 17:45:06 -070028from autotest_lib.server.hosts import base_servohost
Dan Shi5e2efb72017-02-07 11:40:23 -080029
Fang Deng5d518f42013-08-02 14:04:32 -070030
Simran Basi0739d682015-02-25 16:22:56 -080031# Names of the host attributes in the database that represent the values for
32# the servo_host and servo_port for a servo connected to the DUT.
33SERVO_HOST_ATTR = 'servo_host'
34SERVO_PORT_ATTR = 'servo_port'
Richard Barnettee519dcd2016-08-15 17:37:17 -070035SERVO_BOARD_ATTR = 'servo_board'
Nick Sanders2f3c9852018-10-24 12:10:24 -070036# Model is inferred from host labels.
37SERVO_MODEL_ATTR = 'servo_model'
Kevin Cheng643ce8a2016-09-15 15:42:12 -070038SERVO_SERIAL_ATTR = 'servo_serial'
Prathmesh Prabhucba44292018-08-28 17:44:45 -070039SERVO_ATTR_KEYS = (
40 SERVO_BOARD_ATTR,
41 SERVO_HOST_ATTR,
42 SERVO_PORT_ATTR,
43 SERVO_SERIAL_ATTR,
44)
Simran Basi0739d682015-02-25 16:22:56 -080045
Garry Wangc1288cf2019-12-17 14:58:00 -080046# Timeout value for stop/start servod process.
47SERVOD_TEARDOWN_TIMEOUT = 3
48SERVOD_QUICK_STARTUP_TIMEOUT = 20
49SERVOD_STARTUP_TIMEOUT = 60
50
Dan Shi3b2adf62015-09-02 17:46:54 -070051_CONFIG = global_config.global_config
xixuan6cf6d2f2016-01-29 15:29:00 -080052ENABLE_SSH_TUNNEL_FOR_SERVO = _CONFIG.get_config_value(
53 'CROS', 'enable_ssh_tunnel_for_servo', type=bool, default=False)
Simran Basi0739d682015-02-25 16:22:56 -080054
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -070055AUTOTEST_BASE = _CONFIG.get_config_value(
56 'SCHEDULER', 'drone_installation_directory',
57 default='/usr/local/autotest')
58
Otabek Kasimovcc9738e2020-02-14 16:17:15 -080059SERVO_STATE_LABEL_PREFIX = 'servo_state'
60SERVO_STATE_WORKING = 'WORKING'
61SERVO_STATE_BROKEN = 'BROKEN'
62
Fang Deng5d518f42013-08-02 14:04:32 -070063
Garry Wangebc015b2019-06-06 17:45:06 -070064class ServoHost(base_servohost.BaseServoHost):
65 """Host class for a servo host(e.g. beaglebone, labstation)
Dana Goyette0b6e6402019-10-04 11:09:24 -070066 that with a servo instance for a specific port.
67
68 @type _servo: servo.Servo | None
69 """
Fang Deng5d518f42013-08-02 14:04:32 -070070
Raul E Rangel52ca2e82018-07-03 14:10:14 -060071 DEFAULT_PORT = int(os.getenv('SERVOD_PORT', '9999'))
Richard Barnette9a26ad62016-06-10 12:03:08 -070072
Dan Shie5b3c512014-08-21 12:12:09 -070073 # Timeout for initializing servo signals.
Wai-Hong Tam37b6ed32017-09-19 15:52:39 -070074 INITIALIZE_SERVO_TIMEOUT_SECS = 60
Richard Barnette9a26ad62016-06-10 12:03:08 -070075
xixuan6cf6d2f2016-01-29 15:29:00 -080076 # Ready test function
77 SERVO_READY_METHOD = 'get_version'
Fang Deng5d518f42013-08-02 14:04:32 -070078
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -080079 # Directory prefix on the servo host where the servod logs are stored.
80 SERVOD_LOG_PREFIX = '/var/log/servod'
81
82 # Exit code to use when symlinks for servod logs are not found.
83 NO_SYMLINKS_CODE = 9
84
85 # Directory in the job's results directory to dump the logs into.
86 LOG_DIR = 'servod'
87
88 # Prefix for joint loglevel files in the logs.
89 JOINT_LOG_PREFIX = 'log'
90
91 # Regex group to extract timestamp from logfile name.
92 TS_GROUP = 'ts'
93
94 # This regex is used to extract the timestamp from servod logs.
95 # files always start with log.
96 TS_RE = (r'log.'
97 # The timestamp is of format %Y-%m-%d--%H-%M-%S.MS
98 r'(?P<%s>\d{4}(\-\d{2}){2}\-(-\d{2}){3}.\d{3})'
99 # The loglevel is optional depending on labstation version.
100 r'(.(INFO|DEBUG|WARNING))?' % TS_GROUP)
101 TS_EXTRACTOR = re.compile(TS_RE)
102
103 # Regex group to extract MCU name from logline in servod logs.
104 MCU_GROUP = 'mcu'
105
106 # Regex group to extract logline from MCU logline in servod logs.
107 LINE_GROUP = 'line'
108
109 # This regex is used to extract the mcu and the line content from an
110 # MCU logline in servod logs. e.g. EC or servo_v4 console logs.
111 # Here is an example log-line:
112 #
113 # 2020-01-23 13:15:12,223 - servo_v4 - EC3PO.Console - DEBUG -
114 # console.py:219:LogConsoleOutput - /dev/pts/9 - cc polarity: cc1
115 #
116 # Here is conceptually how they are formatted:
117 #
118 # <time> - <MCU> - EC3PO.Console - <LVL> - <file:line:func> - <pts> -
119 # <output>
120 #
121 # The log format starts with a timestamp
122 MCU_RE = (r'[\d\-]+ [\d:,]+ '
123 # The mcu that is logging this is next.
124 r'- (?P<%s>\w+) - '
125 # Next, we have more log outputs before the actual line.
126 # Information about the file line, logging function etc.
127 # Anchor on EC3PO Console, LogConsoleOutput and dev/pts.
128 # NOTE: if the log format changes, this regex needs to be
129 # adjusted.
130 r'EC3PO\.Console[\s\-\w\d:.]+LogConsoleOutput - /dev/pts/\d+ - '
131 # Lastly, we get the MCU's console line.
132 r'(?P<%s>.+$)' % (MCU_GROUP, LINE_GROUP))
133 MCU_EXTRACTOR = re.compile(MCU_RE)
134
135 # Suffix to identify compressed logfiles.
136 COMPRESSION_SUFFIX = '.tbz2'
137
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800138 def _init_attributes(self):
139 self._servo_state = None
140 self.servo_port = None
141 self.servo_board = None
142 self.servo_model = None
143 self.servo_serial = None
144 self._servo = None
145 self._servod_server_proxy = None
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800146 # Flag to make sure that multiple calls to close do not result in the
147 # logic executing multiple times.
148 self._closed = False
Fang Deng5d518f42013-08-02 14:04:32 -0700149
Richard Barnette17bfc6c2016-08-04 18:41:43 -0700150 def _initialize(self, servo_host='localhost',
Richard Barnettee519dcd2016-08-15 17:37:17 -0700151 servo_port=DEFAULT_PORT, servo_board=None,
Nick Sanders2f3c9852018-10-24 12:10:24 -0700152 servo_model=None, servo_serial=None, is_in_lab=None,
153 *args, **dargs):
Fang Deng5d518f42013-08-02 14:04:32 -0700154 """Initialize a ServoHost instance.
155
156 A ServoHost instance represents a host that controls a servo.
157
158 @param servo_host: Name of the host where the servod process
159 is running.
Raul E Rangel52ca2e82018-07-03 14:10:14 -0600160 @param servo_port: Port the servod process is listening on. Defaults
161 to the SERVOD_PORT environment variable if set,
162 otherwise 9999.
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700163 @param servo_board: Board that the servo is connected to.
Nick Sanders2f3c9852018-10-24 12:10:24 -0700164 @param servo_model: Model that the servo is connected to.
Dan Shi4d478522014-02-14 13:46:32 -0800165 @param is_in_lab: True if the servo host is in Cros Lab. Default is set
166 to None, for which utils.host_is_in_lab_zone will be
167 called to check if the servo host is in Cros lab.
Fang Deng5d518f42013-08-02 14:04:32 -0700168
169 """
170 super(ServoHost, self)._initialize(hostname=servo_host,
Garry Wangebc015b2019-06-06 17:45:06 -0700171 is_in_lab=is_in_lab, *args, **dargs)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800172 self._init_attributes()
Richard Barnette42f4db92018-08-23 15:05:15 -0700173 self.servo_port = int(servo_port)
Richard Barnettee519dcd2016-08-15 17:37:17 -0700174 self.servo_board = servo_board
Nick Sanders2f3c9852018-10-24 12:10:24 -0700175 self.servo_model = servo_model
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700176 self.servo_serial = servo_serial
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800177
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800178 # The location of the log files on the servo host for this instance.
179 self.remote_log_dir = '%s_%s' % (self.SERVOD_LOG_PREFIX,
180 self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700181 # Path of the servo host lock file.
182 self._lock_file = (self.TEMP_FILE_DIR + str(self.servo_port)
183 + self.LOCK_FILE_POSTFIX)
184 # File path to declare a reboot request.
185 self._reboot_file = (self.TEMP_FILE_DIR + str(self.servo_port)
186 + self.REBOOT_FILE_POSTFIX)
187
188 # Lock the servo host if it's an in-lab labstation to prevent other
189 # task to reboot it until current task completes. We also wait and
190 # make sure the labstation is up here, in the case of the labstation is
191 # in the middle of reboot.
Garry Wang7c00b0f2019-06-25 17:28:17 -0700192 self._is_locked = False
Garry Wang42b4d862019-06-25 15:50:49 -0700193 if (self.wait_up(self.REBOOT_TIMEOUT) and self.is_in_lab()
194 and self.is_labstation()):
Garry Wang79e9af62019-06-12 15:19:19 -0700195 self._lock()
Garry Wangebc015b2019-06-06 17:45:06 -0700196
Richard Barnette9a26ad62016-06-10 12:03:08 -0700197 self._repair_strategy = (
198 servo_repair.create_servo_repair_strategy())
Richard Barnettee519dcd2016-08-15 17:37:17 -0700199
Richard Barnette9a26ad62016-06-10 12:03:08 -0700200 def connect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700201 """Establish a connection to the servod server on this host.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700202
203 Initializes `self._servo` and then verifies that all network
204 connections are working. This will create an ssh tunnel if
205 it's required.
206
207 As a side effect of testing the connection, all signals on the
208 target servo are reset to default values, and the USB stick is
209 set to the neutral (off) position.
210 """
Kevin Cheng643ce8a2016-09-15 15:42:12 -0700211 servo_obj = servo.Servo(servo_host=self, servo_serial=self.servo_serial)
Kuang-che Wu05763f52019-08-30 16:48:21 +0800212 self._servo = servo_obj
Richard Barnette9a26ad62016-06-10 12:03:08 -0700213 timeout, _ = retry.timeout(
214 servo_obj.initialize_dut,
215 timeout_sec=self.INITIALIZE_SERVO_TIMEOUT_SECS)
216 if timeout:
217 raise hosts.AutoservVerifyError(
218 'Servo initialize timed out.')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700219
220
221 def disconnect_servo(self):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700222 """Disconnect our servo if it exists.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700223
224 If we've previously successfully connected to our servo,
225 disconnect any established ssh tunnel, and set `self._servo`
226 back to `None`.
227 """
228 if self._servo:
229 # N.B. This call is safe even without a tunnel:
230 # rpc_server_tracker.disconnect() silently ignores
231 # unknown ports.
232 self.rpc_server_tracker.disconnect(self.servo_port)
233 self._servo = None
Fang Deng5d518f42013-08-02 14:04:32 -0700234
Garry Wangc1288cf2019-12-17 14:58:00 -0800235
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800236 def _create_servod_server_proxy(self):
237 """Create a proxy that can be used to communicate with servod server.
Fang Deng5d518f42013-08-02 14:04:32 -0700238
239 @returns: An xmlrpclib.ServerProxy that is connected to the servod
240 server on the host.
Fang Deng5d518f42013-08-02 14:04:32 -0700241 """
Richard Barnette9a26ad62016-06-10 12:03:08 -0700242 if ENABLE_SSH_TUNNEL_FOR_SERVO and not self.is_localhost():
243 return self.rpc_server_tracker.xmlrpc_connect(
244 None, self.servo_port,
245 ready_test_name=self.SERVO_READY_METHOD,
Allen Li2b1a8992018-11-27 14:17:18 -0800246 timeout_seconds=60,
Allen Li556f4532018-12-03 18:11:23 -0800247 request_timeout_seconds=3600)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700248 else:
249 remote = 'http://%s:%s' % (self.hostname, self.servo_port)
250 return xmlrpclib.ServerProxy(remote)
Fang Deng5d518f42013-08-02 14:04:32 -0700251
252
Wai-Hong Tam3a8a2552019-11-19 14:28:04 +0800253 def get_servod_server_proxy(self):
254 """Return a cached proxy if exists; otherwise, create a new one.
255
256 @returns: An xmlrpclib.ServerProxy that is connected to the servod
257 server on the host.
258 """
259 # Single-threaded execution, no race
260 if self._servod_server_proxy is None:
261 self._servod_server_proxy = self._create_servod_server_proxy()
262 return self._servod_server_proxy
263
264
Richard Barnette1edbb162016-11-01 11:47:50 -0700265 def verify(self, silent=False):
266 """Update the servo host and verify it's in a good state.
267
268 @param silent If true, suppress logging in `status.log`.
269 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700270 message = 'Beginning verify for servo host %s port %s serial %s'
271 message %= (self.hostname, self.servo_port, self.servo_serial)
272 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700273 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700274 self._repair_strategy.verify(self, silent)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800275 self._servo_state = SERVO_STATE_WORKING
276 self.record('INFO', None, None, 'ServoHost verify set servo_state as WORKING')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700277 except:
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800278 self._servo_state = SERVO_STATE_BROKEN
279 self.record('INFO', None, None, 'ServoHost verify set servo_state as BROKEN')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700280 self.disconnect_servo()
Garry Wangc1288cf2019-12-17 14:58:00 -0800281 self.stop_servod()
Richard Barnette9a26ad62016-06-10 12:03:08 -0700282 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700283
284
Richard Barnette1edbb162016-11-01 11:47:50 -0700285 def repair(self, silent=False):
286 """Attempt to repair servo host.
287
288 @param silent If true, suppress logging in `status.log`.
289 """
Richard Barnetteabbdc252018-07-26 16:57:42 -0700290 message = 'Beginning repair for servo host %s port %s serial %s'
291 message %= (self.hostname, self.servo_port, self.servo_serial)
292 self.record('INFO', None, None, message)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700293 try:
Richard Barnette1edbb162016-11-01 11:47:50 -0700294 self._repair_strategy.repair(self, silent)
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800295 self._servo_state = SERVO_STATE_WORKING
296 self.record('INFO', None, None, 'ServoHost repair set servo_state as WORKING')
Garry Wang464ff1e2019-07-18 17:20:34 -0700297 # If target is a labstation then try to withdraw any existing
298 # reboot request created by this servo because it passed repair.
299 if self.is_labstation():
300 self.withdraw_reboot_request()
Richard Barnette9a26ad62016-06-10 12:03:08 -0700301 except:
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800302 self._servo_state = SERVO_STATE_BROKEN
303 self.record('INFO', None, None, 'ServoHost repair set servo_state as BROKEN')
Richard Barnette9a26ad62016-06-10 12:03:08 -0700304 self.disconnect_servo()
Garry Wangc1288cf2019-12-17 14:58:00 -0800305 self.stop_servod()
Richard Barnette9a26ad62016-06-10 12:03:08 -0700306 raise
Fang Deng5d518f42013-08-02 14:04:32 -0700307
308
Dan Shi4d478522014-02-14 13:46:32 -0800309 def get_servo(self):
310 """Get the cached servo.Servo object.
Fang Deng5d518f42013-08-02 14:04:32 -0700311
Dan Shi4d478522014-02-14 13:46:32 -0800312 @return: a servo.Servo object.
Dana Goyette353d1d92019-06-27 10:43:59 -0700313 @rtype: autotest_lib.server.cros.servo.servo.Servo
Fang Deng5d518f42013-08-02 14:04:32 -0700314 """
Dan Shi4d478522014-02-14 13:46:32 -0800315 return self._servo
316
317
Garry Wang79e9af62019-06-12 15:19:19 -0700318 def request_reboot(self):
319 """Request servohost to be rebooted when it's safe to by touch a file.
320 """
321 logging.debug('Request to reboot servohost %s has been created by '
Garry Wang464ff1e2019-07-18 17:20:34 -0700322 'servo with port # %s', self.hostname, self.servo_port)
Garry Wang79e9af62019-06-12 15:19:19 -0700323 self.run('touch %s' % self._reboot_file, ignore_status=True)
324
325
Garry Wang464ff1e2019-07-18 17:20:34 -0700326 def withdraw_reboot_request(self):
327 """Withdraw a servohost reboot request if exists by remove the flag
328 file.
329 """
330 logging.debug('Withdrawing request to reboot servohost %s that created'
331 ' by servo with port # %s if exists.',
332 self.hostname, self.servo_port)
333 self.run('rm -f %s' % self._reboot_file, ignore_status=True)
334
335
Garry Wangc1288cf2019-12-17 14:58:00 -0800336 def start_servod(self, quick_startup=False):
337 """Start the servod process on servohost.
338 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800339 # Skip if running on the localhost.(crbug.com/1038168)
340 if self.is_localhost():
341 logging.debug("Servohost is a localhost, skipping start servod.")
342 return
343
344 cmd = 'start servod'
Garry Wangc1288cf2019-12-17 14:58:00 -0800345 if self.servo_board:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800346 cmd += ' BOARD=%s' % self.servo_board
Garry Wangc1288cf2019-12-17 14:58:00 -0800347 if self.servo_model:
348 cmd += ' MODEL=%s' % self.servo_model
Garry Wangc1288cf2019-12-17 14:58:00 -0800349 else:
Garry Wang2ac15ee2019-12-30 19:03:02 -0800350 logging.warning('Board for DUT is unknown; starting servod'
351 ' assuming a pre-configured board.')
352
353 cmd += ' PORT=%d' % self.servo_port
354 if self.servo_serial:
355 cmd += ' SERIAL=%s' % self.servo_serial
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800356 # Remove the symbolic links from the logs. This helps ensure that
357 # a failed servod instantiation does not cause us to grab old logs
358 # by mistake.
359 self.remove_latest_log_symlinks()
Garry Wangcdd27b22020-01-13 14:59:11 -0800360 self.run(cmd, timeout=60)
Garry Wangc1288cf2019-12-17 14:58:00 -0800361
362 # There's a lag between when `start servod` completes and when
363 # the _ServodConnectionVerifier trigger can actually succeed.
364 # The call to time.sleep() below gives time to make sure that
365 # the trigger won't fail after we return.
366
367 # Normally servod on servo_v3 and labstation take ~10 seconds to ready,
368 # But in the rare case all servo on a labstation are in heavy use they
369 # may take ~30 seconds. So the timeout value will double these value,
370 # and we'll try quick start up when first time initialize servohost,
371 # and use standard start up timeout in repair.
372 if quick_startup:
373 timeout = SERVOD_QUICK_STARTUP_TIMEOUT
374 else:
375 timeout = SERVOD_STARTUP_TIMEOUT
376 logging.debug('Wait %s seconds for servod process fully up.', timeout)
377 time.sleep(timeout)
378
379
380 def stop_servod(self):
381 """Stop the servod process on servohost.
382 """
Garry Wang2ac15ee2019-12-30 19:03:02 -0800383 # Skip if running on the localhost.(crbug.com/1038168)
384 if self.is_localhost():
385 logging.debug("Servohost is a localhost, skipping stop servod.")
386 return
387
Garry Wangc1288cf2019-12-17 14:58:00 -0800388 logging.debug('Stopping servod on port %s', self.servo_port)
Garry Wangcdd27b22020-01-13 14:59:11 -0800389 self.run('stop servod PORT=%d' % self.servo_port,
390 timeout=60, ignore_status=True)
Garry Wangc1288cf2019-12-17 14:58:00 -0800391 logging.debug('Wait %s seconds for servod process fully teardown.',
392 SERVOD_TEARDOWN_TIMEOUT)
393 time.sleep(SERVOD_TEARDOWN_TIMEOUT)
394
395
396 def restart_servod(self, quick_startup=False):
397 """Restart the servod process on servohost.
398 """
399 self.stop_servod()
400 self.start_servod(quick_startup)
401
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800402 def _extract_compressed_logs(self, logdir, relevant_files):
403 """Decompress servod logs in |logdir|.
404
405 @param logdir: directory containing compressed servod logs.
406 @param relevant_files: list of files in |logdir| to consider.
407
408 @returns: tuple, (tarfiles, files) where
409 tarfiles: list of the compressed filenames that have been
410 extracted and deleted
411 files: list of the uncompressed files that were generated
412 """
413 # For all tar-files, first extract them to the directory, and
414 # then let the common flow handle them.
415 tarfiles = [cf for cf in relevant_files if
416 cf.endswith(self.COMPRESSION_SUFFIX)]
417 files = []
418 for f in tarfiles:
419 norm_name = os.path.basename(f)[:-len(self.COMPRESSION_SUFFIX)]
420 with tarfile.open(f) as tf:
421 # Each tarfile has only one member, as
422 # that's the compressed log.
423 member = tf.members[0]
424 # Manipulate so that it only extracts the basename, and not
425 # the directories etc.
426 member.name = norm_name
427 files.append(os.path.join(logdir, member.name))
428 tf.extract(member, logdir)
429 # File has been extracted: remove the compressed file.
430 os.remove(f)
431 return tarfiles, files
432
433 def _extract_mcu_logs(self, log_subdir):
434 """Extract MCU (EC, Cr50, etc) console output from servod debug logs.
435
436 Using the MCU_EXTRACTOR regex (above) extract and split out MCU console
437 lines from the logs to generate invidiual console logs e.g. after
438 this method, you can find an ec.txt and servo_v4.txt in |log_dir| if
439 those MCUs had any console input/output.
440
441 @param log_subdir: directory with log.DEBUG.txt main servod debug logs.
442 """
443 # Extract the MCU for each one. The MCU logs are only in the .DEBUG
444 # files
445 mcu_lines_file = os.path.join(log_subdir, 'log.DEBUG.txt')
446 if not os.path.exists(mcu_lines_file):
447 logging.info('No DEBUG logs found to extract MCU logs from.')
448 return
449 mcu_files = {}
450 mcu_file_template = '%s.txt'
451 with open(mcu_lines_file, 'r') as f:
452 for line in f:
453 match = self.MCU_EXTRACTOR.match(line)
454 if match:
455 mcu = match.group(self.MCU_GROUP).lower()
456 line = match.group(self.LINE_GROUP)
457 if mcu not in mcu_files:
458 mcu_file = os.path.join(log_subdir,
459 mcu_file_template % mcu)
460 mcu_files[mcu] = open(mcu_file, 'a')
461 fd = mcu_files[mcu]
462 fd.write(line + '\n')
463 for f in mcu_files:
464 mcu_files[f].close()
465
466
467 def remove_latest_log_symlinks(self):
468 """Remove the conveninence symlinks 'latest' servod logs."""
469 symlink_wildcard = '%s/latest*' % self.remote_log_dir
470 cmd = 'rm ' + symlink_wildcard
471 self.run(cmd, stderr_tee=None, ignore_status=True)
472
473 def grab_logs(self, outdir):
474 """Retrieve logs from servo_host to |outdir|/servod_{port}.{ts}/.
475
476 This method first collects all logs on the servo_host side pertaining
477 to this servod instance (port, instatiation). It glues them together
478 into combined log.[level].txt files and extracts all available MCU
479 console I/O from the logs into individual files e.g. servo_v4.txt
480
481 All the output can be found in a directory inside |outdir| that
482 this generates based on |LOG_DIR|, the servod port, and the instance
483 timestamp on the servo_host side.
484
485 @param outdir: directory to create a subdirectory into to place the
486 servod logs into.
487 """
488 # First, extract the timestamp. This cmd gives the real filename of
489 # the latest aka current log file.
490 cmd = ('if [ -f %(dir)s/latest.DEBUG ];'
491 'then realpath %(dir)s/latest.DEBUG;'
492 'elif [ -f %(dir)s/latest ];'
493 'then realpath %(dir)s/latest;'
494 'else exit %(code)d;'
495 'fi' % {'dir': self.remote_log_dir,
496 'code': self.NO_SYMLINKS_CODE})
497 res = self.run(cmd, stderr_tee=None, ignore_status=True)
498 if res.exit_status != 0:
499 if res.exit_status == self.NO_SYMLINKS_CODE:
500 logging.warning('servod log latest symlinks not found. '
501 'This is likely due to an error starting up '
502 'servod. Ignoring..')
503 else:
504 logging.warning('Failed to find servod logs on servo host.')
505 logging.warning(res.stderr.strip())
506 return
507 fname = os.path.basename(res.stdout.strip())
508 # From the fname, ought to extract the timestamp using the TS_EXTRACTOR
509 instance_ts = self.TS_EXTRACTOR.match(fname).group(self.TS_GROUP)
510 # Create the local results log dir.
511 log_dir = os.path.join(outdir, '%s_%s.%s' % (self.LOG_DIR,
512 str(self.servo_port),
513 instance_ts))
514 logging.info('Saving servod logs to %s.', log_dir)
515 os.mkdir(log_dir)
516 # Now, get all files with that timestamp.
517 cmd = 'find %s -maxdepth 1 -name "log.%s*"' % (self.remote_log_dir,
518 instance_ts)
519 res = self.run(cmd, stderr_tee=None, ignore_status=True)
520 files = res.stdout.strip().split()
521 try:
522 self.get_file(files, log_dir, try_rsync=False)
523
524 except error.AutoservRunError as e:
525 result = e.result_obj
526 if result.exit_status != 0:
527 stderr = result.stderr.strip()
528 logging.warning("Couldn't retrieve servod logs. Ignoring: %s",
529 stderr or '\n%s' % result)
530 return
531 local_files = [os.path.join(log_dir, f) for f in os.listdir(log_dir)]
532 # TODO(crrev.com/c/1793030): remove no-level case once CL is pushed
533 for level_name in ('DEBUG', 'INFO', 'WARNING', ''):
534 # Create the joint files for each loglevel. i.e log.DEBUG
535 joint_file = self.JOINT_LOG_PREFIX
536 if level_name:
537 joint_file = '%s.%s' % (self.JOINT_LOG_PREFIX, level_name)
538 # This helps with some online tools to avoid complaints about an
539 # unknown filetype.
540 joint_file = joint_file + '.txt'
541 joint_path = os.path.join(log_dir, joint_file)
542 files = [f for f in local_files if level_name in f]
543 if not files:
544 # TODO(crrev.com/c/1793030): remove no-level case once CL
545 # is pushed
546 continue
547 # Extract compressed logs if any.
548 compressed, extracted = self._extract_compressed_logs(log_dir,
549 files)
550 files = list(set(files) - set(compressed))
551 files.extend(extracted)
552 # Need to sort. As they all share the same timestamp, and
553 # loglevel, the index itself is sufficient. The highest index
554 # is the oldest file, therefore we need a descending sort.
555 def sortkey(f, level=level_name):
556 """Custom sortkey to sort based on rotation number int."""
557 if f.endswith(level_name): return 0
558 return int(f.split('.')[-1])
559
560 files.sort(reverse=True, key=sortkey)
561 # Just rename the first file rather than building from scratch.
562 os.rename(files[0], joint_path)
563 with open(joint_path, 'a') as joint_f:
564 for logfile in files[1:]:
565 # Transfer the file to the joint file line by line.
566 with open(logfile, 'r') as log_f:
567 for line in log_f:
568 joint_f.write(line)
569 # File has been written over. Delete safely.
570 os.remove(logfile)
571 # Need to remove all files form |local_files| so we don't
572 # analyze them again.
573 local_files = list(set(local_files) - set(files) - set(compressed))
574 # Lastly, extract MCU logs from the joint logs.
575 self._extract_mcu_logs(log_dir)
576
Garry Wangc1288cf2019-12-17 14:58:00 -0800577
Garry Wang79e9af62019-06-12 15:19:19 -0700578 def _lock(self):
579 """lock servohost by touching a file.
580 """
581 logging.debug('Locking servohost %s by touching %s file',
582 self.hostname, self._lock_file)
583 self.run('touch %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700584 self._is_locked = True
Garry Wang79e9af62019-06-12 15:19:19 -0700585
586
587 def _unlock(self):
588 """Unlock servohost by removing the lock file.
589 """
590 logging.debug('Unlocking servohost by removing %s file',
591 self._lock_file)
592 self.run('rm %s' % self._lock_file, ignore_status=True)
Garry Wang7c00b0f2019-06-25 17:28:17 -0700593 self._is_locked = False
Garry Wang79e9af62019-06-12 15:19:19 -0700594
595
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700596 def close(self):
Congbin Guofc3b8962019-03-22 17:38:46 -0700597 """Close the associated servo and the host object."""
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800598 if self._closed:
599 logging.debug('ServoHost is already closed.')
600 return
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700601 if self._servo:
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800602 outdir = None if not self.job else self.job.resultdir
Congbin Guo2e5e2a22018-07-27 10:32:48 -0700603 # In some cases when we run as lab-tools, the job object is None.
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800604 self._servo.close(outdir)
605
606 if self.job and not self.is_localhost():
607 # Grab all logs from this servod instance before stopping servod.
608 # TODO(crbug.com/1011516): once enabled, remove the check against
609 # localhost and instead check against log-rotiation enablement.
610 try:
611 self.grab_logs(self.job.resultdir)
612 except error.AutoservRunError as e:
613 logging.info('Failed to grab servo logs due to: %s. '
614 'This error is forgiven.', str(e))
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700615
Garry Wang7c00b0f2019-06-25 17:28:17 -0700616 if self._is_locked:
617 # Remove the lock if the servohost has been locked.
Garry Wang79e9af62019-06-12 15:19:19 -0700618 try:
619 self._unlock()
620 except error.AutoservSSHTimeout:
621 logging.error('Unlock servohost failed due to ssh timeout.'
622 ' It may caused by servohost went down during'
623 ' the task.')
Garry Wangc1288cf2019-12-17 14:58:00 -0800624 # We want always stop servod after task to minimum the impact of bad
625 # servod process interfere other servods.(see crbug.com/1028665)
Garry Wang4c624bc2020-01-27 16:34:43 -0800626 try:
627 self.stop_servod()
628 except error.AutoservRunError as e:
629 logging.info("Failed to stop servod due to:\n%s\n"
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800630 "This error is forgiven.", str(e))
Garry Wangc1288cf2019-12-17 14:58:00 -0800631
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700632 super(ServoHost, self).close()
Ruben Rodriguez Buchillon93084d02020-01-21 15:17:36 -0800633 # Mark closed.
634 self._closed = True
Congbin Guoa1f9cba2018-07-03 11:36:59 -0700635
636
Otabek Kasimovcc9738e2020-02-14 16:17:15 -0800637 def get_servo_state(self):
638 return SERVO_STATE_BROKEN if self._servo_state is None else self._servo_state
639
640
Richard Barnetteea3e4602016-06-10 12:36:41 -0700641def make_servo_hostname(dut_hostname):
642 """Given a DUT's hostname, return the hostname of its servo.
643
644 @param dut_hostname: hostname of a DUT.
645
646 @return hostname of the DUT's servo.
647
648 """
649 host_parts = dut_hostname.split('.')
650 host_parts[0] = host_parts[0] + '-servo'
651 return '.'.join(host_parts)
652
653
654def servo_host_is_up(servo_hostname):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700655 """Given a servo host name, return if it's up or not.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700656
657 @param servo_hostname: hostname of the servo host.
658
659 @return True if it's up, False otherwise
660 """
661 # Technically, this duplicates the SSH ping done early in the servo
662 # proxy initialization code. However, this ping ends in a couple
663 # seconds when if fails, rather than the 60 seconds it takes to decide
664 # that an SSH ping has timed out. Specifically, that timeout happens
665 # when our servo DNS name resolves, but there is no host at that IP.
666 logging.info('Pinging servo host at %s', servo_hostname)
667 ping_config = ping_runner.PingConfig(
668 servo_hostname, count=3,
669 ignore_result=True, ignore_status=True)
670 return ping_runner.PingRunner().ping(ping_config).received > 0
671
672
Richard Barnettee519dcd2016-08-15 17:37:17 -0700673def _map_afe_board_to_servo_board(afe_board):
674 """Map a board we get from the AFE to a servo appropriate value.
675
676 Many boards are identical to other boards for servo's purposes.
677 This function makes that mapping.
678
679 @param afe_board string board name received from AFE.
680 @return board we expect servo to have.
681
682 """
683 KNOWN_SUFFIXES = ['-freon', '_freon', '_moblab', '-cheets']
684 BOARD_MAP = {'gizmo': 'panther'}
685 mapped_board = afe_board
686 if afe_board in BOARD_MAP:
687 mapped_board = BOARD_MAP[afe_board]
688 else:
689 for suffix in KNOWN_SUFFIXES:
690 if afe_board.endswith(suffix):
691 mapped_board = afe_board[0:-len(suffix)]
692 break
693 if mapped_board != afe_board:
694 logging.info('Mapping AFE board=%s to %s', afe_board, mapped_board)
695 return mapped_board
696
697
Prathmesh Prabhub4810232018-09-07 13:24:08 -0700698def get_servo_args_for_host(dut_host):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700699 """Return servo data associated with a given DUT.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700700
Richard Barnetteea3e4602016-06-10 12:36:41 -0700701 @param dut_host Instance of `Host` on which to find the servo
702 attributes.
Prathmesh Prabhuf605dd32018-08-28 17:09:04 -0700703 @return `servo_args` dict with host and an optional port.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700704 """
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700705 info = dut_host.host_info_store.get()
706 servo_args = {k: v for k, v in info.attributes.iteritems()
707 if k in SERVO_ATTR_KEYS}
Richard Barnetteea3e4602016-06-10 12:36:41 -0700708
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700709 if SERVO_PORT_ATTR in servo_args:
710 try:
711 servo_args[SERVO_PORT_ATTR] = int(servo_args[SERVO_PORT_ATTR])
712 except ValueError:
713 logging.error('servo port is not an int: %s',
714 servo_args[SERVO_PORT_ATTR])
715 # Reset servo_args because we don't want to use an invalid port.
716 servo_args.pop(SERVO_HOST_ATTR, None)
717
718 if info.board:
719 servo_args[SERVO_BOARD_ATTR] = _map_afe_board_to_servo_board(info.board)
Nick Sanders2f3c9852018-10-24 12:10:24 -0700720 if info.model:
721 servo_args[SERVO_MODEL_ATTR] = info.model
Prathmesh Prabhu6f5f6362018-09-05 17:20:31 -0700722 return servo_args if SERVO_HOST_ATTR in servo_args else None
Richard Barnetteea3e4602016-06-10 12:36:41 -0700723
724
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -0700725def _tweak_args_for_ssp_moblab(servo_args):
726 if servo_args[SERVO_HOST_ATTR] in ['localhost', '127.0.0.1']:
727 servo_args[SERVO_HOST_ATTR] = _CONFIG.get_config_value(
728 'SSP', 'host_container_ip', type=str, default=None)
729
730
Dan Shi023aae32016-05-25 11:13:01 -0700731def create_servo_host(dut, servo_args, try_lab_servo=False,
Gregory Nisbetde13e2a2019-12-09 22:44:00 -0800732 try_servo_repair=False, dut_host_info=None):
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700733 """Create a ServoHost object for a given DUT, if appropriate.
Dan Shi4d478522014-02-14 13:46:32 -0800734
Richard Barnette9a26ad62016-06-10 12:03:08 -0700735 This function attempts to create and verify or repair a `ServoHost`
736 object for a servo connected to the given `dut`, subject to various
737 constraints imposed by the parameters:
738 * When the `servo_args` parameter is not `None`, a servo
739 host must be created, and must be checked with `repair()`.
740 * Otherwise, if a servo exists in the lab and `try_lab_servo` is
741 true:
742 * If `try_servo_repair` is true, then create a servo host and
743 check it with `repair()`.
744 * Otherwise, if the servo responds to `ping` then create a
745 servo host and check it with `verify()`.
Fang Denge545abb2014-12-30 18:43:47 -0800746
Richard Barnette9a26ad62016-06-10 12:03:08 -0700747 In cases where `servo_args` was not `None`, repair failure
748 exceptions are passed back to the caller; otherwise, exceptions
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700749 are logged and then discarded. Note that this only happens in cases
750 where we're called from a test (not special task) control file that
751 has an explicit dependency on servo. In that case, we require that
752 repair not write to `status.log`, so as to avoid polluting test
753 results.
754
755 TODO(jrbarnette): The special handling for servo in test control
756 files is a thorn in my flesh; I dearly hope to see it cut out before
757 my retirement.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700758
759 Parameters for a servo host consist of a host name, port number, and
760 DUT board, and are determined from one of these sources, in order of
761 priority:
Richard Barnetteea3e4602016-06-10 12:36:41 -0700762 * Servo attributes from the `dut` parameter take precedence over
763 all other sources of information.
764 * If a DNS entry for the servo based on the DUT hostname exists in
765 the CrOS lab network, that hostname is used with the default
Richard Barnette9a26ad62016-06-10 12:03:08 -0700766 port and the DUT's board.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700767 * If no other options are found, the parameters will be taken
Richard Barnette9a26ad62016-06-10 12:03:08 -0700768 from the `servo_args` dict passed in from the caller.
Richard Barnetteea3e4602016-06-10 12:36:41 -0700769
770 @param dut An instance of `Host` from which to take
771 servo parameters (if available).
772 @param servo_args A dictionary with servo parameters to use if
773 they can't be found from `dut`. If this
774 argument is supplied, unrepaired exceptions
775 from `verify()` will be passed back to the
776 caller.
777 @param try_lab_servo If not true, servo host creation will be
778 skipped unless otherwise required by the
779 caller.
Richard Barnette9a26ad62016-06-10 12:03:08 -0700780 @param try_servo_repair If true, check a servo host with
781 `repair()` instead of `verify()`.
Dan Shi4d478522014-02-14 13:46:32 -0800782
783 @returns: A ServoHost object or None. See comments above.
784
785 """
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700786 servo_dependency = servo_args is not None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700787 if dut is not None and (try_lab_servo or servo_dependency):
Prathmesh Prabhub4810232018-09-07 13:24:08 -0700788 servo_args_override = get_servo_args_for_host(dut)
Richard Barnetteea3e4602016-06-10 12:36:41 -0700789 if servo_args_override is not None:
Prathmesh Prabhuefb1b482018-08-28 17:15:05 -0700790 if utils.in_moblab_ssp():
791 _tweak_args_for_ssp_moblab(servo_args_override)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700792 logging.debug(
793 'Overriding provided servo_args (%s) with arguments'
794 ' determined from the host (%s)',
795 servo_args,
796 servo_args_override,
797 )
Richard Barnetteea3e4602016-06-10 12:36:41 -0700798 servo_args = servo_args_override
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700799
Richard Barnetteea3e4602016-06-10 12:36:41 -0700800 if servo_args is None:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700801 logging.debug('No servo_args provided, and failed to find overrides.')
Richard Barnetteea3e4602016-06-10 12:36:41 -0700802 return None
Prathmesh Prabhucba44292018-08-28 17:44:45 -0700803 if SERVO_HOST_ATTR not in servo_args:
804 logging.debug('%s attribute missing from servo_args: %s',
805 SERVO_HOST_ATTR, servo_args)
806 return None
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700807 if (not servo_dependency and not try_servo_repair and
Richard Barnette9a26ad62016-06-10 12:03:08 -0700808 not servo_host_is_up(servo_args[SERVO_HOST_ATTR])):
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700809 logging.debug('ServoHost is not up.')
Dan Shibbb0cb62014-03-24 17:50:57 -0700810 return None
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700811
Garry Wangebc015b2019-06-06 17:45:06 -0700812 newhost = ServoHost(**servo_args)
Garry Wangcdd27b22020-01-13 14:59:11 -0800813 try:
814 newhost.restart_servod(quick_startup=True)
815 except error.AutoservSSHTimeout:
816 logging.warning("Restart servod failed due ssh connection "
817 "to servohost timed out. This error is forgiven"
818 " here, we will retry in servo repair process.")
819 except error.AutoservRunError as e:
820 logging.warning("Restart servod failed due to:\n%s\n"
821 "This error is forgiven here, we will retry"
822 " in servo repair process.", str(e))
Garry Wangebc015b2019-06-06 17:45:06 -0700823
Gregory Nisbetde13e2a2019-12-09 22:44:00 -0800824 # TODO(gregorynisbet): Clean all of this up.
825 logging.debug('create_servo_host: attempt to set info store on '
826 'servo host')
827 try:
828 if dut_host_info is None:
829 logging.debug('create_servo_host: dut_host_info is '
830 'None, skipping')
831 else:
832 newhost.set_dut_host_info(dut_host_info)
833 logging.debug('create_servo_host: successfully set info '
834 'store')
835 except Exception:
836 logging.error("create_servo_host: (%s)", traceback.format_exc())
837
Richard Barnette9a26ad62016-06-10 12:03:08 -0700838 # Note that the logic of repair() includes everything done
839 # by verify(). It's sufficient to call one or the other;
840 # we don't need both.
Richard Barnette07c2e1d2016-10-26 14:24:28 -0700841 if servo_dependency:
842 newhost.repair(silent=True)
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700843 return newhost
844
845 if try_servo_repair:
846 try:
847 newhost.repair()
848 except Exception:
849 logging.exception('servo repair failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700850 else:
851 try:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700852 newhost.verify()
Kevin Cheng5f2ba6c2016-09-28 10:20:05 -0700853 except Exception:
Prathmesh Prabhu88bf6052018-08-28 16:21:26 -0700854 logging.exception('servo verify failed for %s', newhost.hostname)
Richard Barnette9a26ad62016-06-10 12:03:08 -0700855 return newhost