Congbin Guo | 3afae6c | 2019-08-13 16:29:42 -0700 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | # Copyright 2019 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | """A cherrypy application to check devserver health status.""" |
| 6 | |
| 7 | from __future__ import absolute_import |
| 8 | from __future__ import division |
| 9 | from __future__ import print_function |
| 10 | |
| 11 | import json |
| 12 | import os |
| 13 | import subprocess |
| 14 | import threading |
| 15 | import time |
| 16 | |
Amin Hassani | d4e3539 | 2019-10-03 11:02:44 -0700 | [diff] [blame] | 17 | import cherrypy # pylint: disable=import-error |
Congbin Guo | 3afae6c | 2019-08-13 16:29:42 -0700 | [diff] [blame] | 18 | |
| 19 | import cros_update_progress |
Congbin Guo | 3afae6c | 2019-08-13 16:29:42 -0700 | [diff] [blame] | 20 | |
Achuith Bhandarkar | 662fb72 | 2019-10-31 16:12:49 -0700 | [diff] [blame] | 21 | import setup_chromite # pylint: disable=unused-import |
| 22 | from chromite.lib.xbuddy import cherrypy_log_util |
Congbin Guo | 3afae6c | 2019-08-13 16:29:42 -0700 | [diff] [blame] | 23 | |
| 24 | try: |
| 25 | import psutil |
| 26 | except ImportError: |
| 27 | # Ignore psutil import failure. This is for backwards compatibility, so |
| 28 | # "cros flash" can still update duts with build without psutil installed. |
| 29 | # The reason is that, during cros flash, local devserver code is copied over |
| 30 | # to DUT, and devserver will be running inside DUT to stage the build. |
Congbin Guo | 3afae6c | 2019-08-13 16:29:42 -0700 | [diff] [blame] | 31 | psutil = None |
Achuith Bhandarkar | 662fb72 | 2019-10-31 16:12:49 -0700 | [diff] [blame] | 32 | except OSError: |
Congbin Guo | 3afae6c | 2019-08-13 16:29:42 -0700 | [diff] [blame] | 33 | # Ignore error like following. psutil may not work properly in builder. Ignore |
| 34 | # the error as load information of devserver is not used in builder. |
| 35 | # OSError: [Errno 2] No such file or directory: '/dev/pts/0' |
Congbin Guo | 3afae6c | 2019-08-13 16:29:42 -0700 | [diff] [blame] | 36 | psutil = None |
| 37 | |
| 38 | |
Achuith Bhandarkar | 662fb72 | 2019-10-31 16:12:49 -0700 | [diff] [blame] | 39 | def _Log(message, *args): |
| 40 | """Module-local log function.""" |
| 41 | return cherrypy_log_util.LogWithTag('HEALTHCHECKER', message, *args) |
| 42 | |
Congbin Guo | 3afae6c | 2019-08-13 16:29:42 -0700 | [diff] [blame] | 43 | # Number of seconds between the collection of disk and network IO counters. |
| 44 | STATS_INTERVAL = 10.0 |
| 45 | _1G = 1000000000 |
| 46 | |
| 47 | |
| 48 | def require_psutil(): |
| 49 | """Decorator for functions require psutil to run.""" |
| 50 | def deco_require_psutil(func): |
| 51 | """Wrapper of the decorator function. |
| 52 | |
| 53 | Args: |
| 54 | func: function to be called. |
| 55 | """ |
| 56 | def func_require_psutil(*args, **kwargs): |
| 57 | """Decorator for functions require psutil to run. |
| 58 | |
| 59 | If psutil is not installed, skip calling the function. |
| 60 | |
| 61 | Args: |
| 62 | *args: arguments for function to be called. |
| 63 | **kwargs: keyword arguments for function to be called. |
| 64 | """ |
| 65 | if psutil: |
| 66 | return func(*args, **kwargs) |
| 67 | else: |
| 68 | _Log('Python module psutil is not installed. Function call %s is ' |
| 69 | 'skipped.' % func) |
| 70 | return func_require_psutil |
| 71 | return deco_require_psutil |
| 72 | |
| 73 | |
| 74 | def _get_process_count(process_cmd_pattern): |
| 75 | """Get the count of processes that match the given command pattern. |
| 76 | |
| 77 | Args: |
| 78 | process_cmd_pattern: The regex pattern of process command to match. |
| 79 | |
| 80 | Returns: |
| 81 | The count of processes that match the given command pattern. |
| 82 | """ |
| 83 | try: |
| 84 | # Use Popen instead of check_output since the latter cannot run with old |
| 85 | # python version (less than 2.7) |
| 86 | proc = subprocess.Popen( |
| 87 | ['pgrep', '-fc', process_cmd_pattern], |
| 88 | stdout=subprocess.PIPE, |
| 89 | stderr=subprocess.PIPE, |
| 90 | ) |
| 91 | cmd_output, cmd_error = proc.communicate() |
| 92 | if cmd_error: |
| 93 | _Log('Error happened when getting process count: %s' % cmd_error) |
| 94 | |
| 95 | return int(cmd_output) |
| 96 | except subprocess.CalledProcessError: |
| 97 | return 0 |
| 98 | |
| 99 | |
| 100 | def get_config(): |
| 101 | """Get cherrypy config for this application.""" |
| 102 | return { |
| 103 | '/': { |
| 104 | # Automatically add trailing slash, i.e. |
| 105 | # /check_health -> /check_health/. |
| 106 | 'tools.trailing_slash.on': False, |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | |
| 111 | class Root(object): |
| 112 | """Cherrypy Root class of the application.""" |
| 113 | def __init__(self, devserver, static_dir): |
| 114 | self._static_dir = static_dir |
| 115 | self._devserver = devserver |
| 116 | |
| 117 | # Cache of disk IO stats, a thread refresh the stats every 10 seconds. |
| 118 | # lock is not used for these variables as the only thread writes to these |
| 119 | # variables is _refresh_io_stats. |
| 120 | self.disk_read_bytes_per_sec = 0 |
| 121 | self.disk_write_bytes_per_sec = 0 |
| 122 | # Cache of network IO stats. |
| 123 | self.network_sent_bytes_per_sec = 0 |
| 124 | self.network_recv_bytes_per_sec = 0 |
| 125 | self._start_io_stat_thread() |
| 126 | |
| 127 | @require_psutil() |
| 128 | def _get_io_stats(self): |
| 129 | """Get the IO stats as a dictionary. |
| 130 | |
| 131 | Returns: |
| 132 | A dictionary of IO stats collected by psutil. |
| 133 | """ |
| 134 | return {'disk_read_bytes_per_second': self.disk_read_bytes_per_sec, |
| 135 | 'disk_write_bytes_per_second': self.disk_write_bytes_per_sec, |
| 136 | 'disk_total_bytes_per_second': (self.disk_read_bytes_per_sec + |
| 137 | self.disk_write_bytes_per_sec), |
| 138 | 'network_sent_bytes_per_second': self.network_sent_bytes_per_sec, |
| 139 | 'network_recv_bytes_per_second': self.network_recv_bytes_per_sec, |
| 140 | 'network_total_bytes_per_second': (self.network_sent_bytes_per_sec + |
| 141 | self.network_recv_bytes_per_sec), |
| 142 | 'cpu_percent': psutil.cpu_percent(), } |
| 143 | |
| 144 | @require_psutil() |
| 145 | def _refresh_io_stats(self): |
| 146 | """A call running in a thread to update IO stats periodically.""" |
| 147 | prev_disk_io_counters = psutil.disk_io_counters() |
| 148 | prev_network_io_counters = psutil.net_io_counters() |
| 149 | prev_read_time = time.time() |
| 150 | while True: |
| 151 | time.sleep(STATS_INTERVAL) |
| 152 | now = time.time() |
| 153 | interval = now - prev_read_time |
| 154 | prev_read_time = now |
| 155 | # Disk IO is for all disks. |
| 156 | disk_io_counters = psutil.disk_io_counters() |
| 157 | network_io_counters = psutil.net_io_counters() |
| 158 | |
| 159 | self.disk_read_bytes_per_sec = ( |
| 160 | disk_io_counters.read_bytes - |
| 161 | prev_disk_io_counters.read_bytes) / interval |
| 162 | self.disk_write_bytes_per_sec = ( |
| 163 | disk_io_counters.write_bytes - |
| 164 | prev_disk_io_counters.write_bytes) / interval |
| 165 | prev_disk_io_counters = disk_io_counters |
| 166 | |
| 167 | self.network_sent_bytes_per_sec = ( |
| 168 | network_io_counters.bytes_sent - |
| 169 | prev_network_io_counters.bytes_sent) / interval |
| 170 | self.network_recv_bytes_per_sec = ( |
| 171 | network_io_counters.bytes_recv - |
| 172 | prev_network_io_counters.bytes_recv) / interval |
| 173 | prev_network_io_counters = network_io_counters |
| 174 | |
| 175 | @require_psutil() |
| 176 | def _start_io_stat_thread(self): |
| 177 | """Start the thread to collect IO stats.""" |
| 178 | thread = threading.Thread(target=self._refresh_io_stats) |
| 179 | thread.daemon = True |
| 180 | thread.start() |
| 181 | |
| 182 | @cherrypy.expose |
| 183 | def index(self): |
| 184 | """Collect the health status of devserver to see if it's ready for staging. |
| 185 | |
| 186 | Returns: |
| 187 | A JSON dictionary containing all or some of the following fields: |
| 188 | free_disk (int): free disk space in GB |
| 189 | staging_thread_count (int): number of devserver threads currently staging |
| 190 | an image |
| 191 | apache_client_count (int): count of Apache processes. |
| 192 | telemetry_test_count (int): count of telemetry tests. |
| 193 | gsutil_count (int): count of gsutil processes. |
| 194 | """ |
| 195 | # Get free disk space. |
| 196 | stat = os.statvfs(self._static_dir) |
| 197 | free_disk = stat.f_bsize * stat.f_bavail / _1G |
| 198 | apache_client_count = _get_process_count('bin/apache2? -k start') |
| 199 | telemetry_test_count = _get_process_count('python.*telemetry') |
| 200 | gsutil_count = _get_process_count('gsutil') |
| 201 | au_process_count = len(cros_update_progress.GetAllRunningAUProcess()) |
| 202 | |
| 203 | health_data = { |
| 204 | 'free_disk': free_disk, |
| 205 | 'staging_thread_count': self._devserver.staging_thread_count, |
| 206 | 'apache_client_count': apache_client_count, |
| 207 | 'telemetry_test_count': telemetry_test_count, |
| 208 | 'gsutil_count': gsutil_count, |
| 209 | 'au_process_count': au_process_count, |
| 210 | } |
| 211 | health_data.update(self._get_io_stats() or {}) |
| 212 | |
| 213 | return json.dumps(health_data) |