blob: 8c274a3097b326410245633efa553da180de92d3 [file] [log] [blame]
Congbin Guo3afae6c2019-08-13 16:29:42 -07001# -*- coding: utf-8 -*-
2# Copyright 2019 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""A cherrypy application to check devserver health status."""
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
11import json
12import os
13import subprocess
14import threading
15import time
16
17import cherrypy
18
19import cros_update_progress
20import log_util
21
22
23def _Log(message, *args):
24 """Module-local log function."""
25 return log_util.LogWithTag('HEALTHCHECKER', message, *args)
26
27
28try:
29 import psutil
30except ImportError:
31 # Ignore psutil import failure. This is for backwards compatibility, so
32 # "cros flash" can still update duts with build without psutil installed.
33 # The reason is that, during cros flash, local devserver code is copied over
34 # to DUT, and devserver will be running inside DUT to stage the build.
35 _Log('Python module psutil is not installed, devserver load data will not be '
36 'collected')
37 psutil = None
38except OSError as e:
39 # Ignore error like following. psutil may not work properly in builder. Ignore
40 # the error as load information of devserver is not used in builder.
41 # OSError: [Errno 2] No such file or directory: '/dev/pts/0'
42 _Log('psutil is failed to be imported, error: %s. devserver load data will '
43 'not be collected.', e)
44 psutil = None
45
46
47# Number of seconds between the collection of disk and network IO counters.
48STATS_INTERVAL = 10.0
49_1G = 1000000000
50
51
52def require_psutil():
53 """Decorator for functions require psutil to run."""
54 def deco_require_psutil(func):
55 """Wrapper of the decorator function.
56
57 Args:
58 func: function to be called.
59 """
60 def func_require_psutil(*args, **kwargs):
61 """Decorator for functions require psutil to run.
62
63 If psutil is not installed, skip calling the function.
64
65 Args:
66 *args: arguments for function to be called.
67 **kwargs: keyword arguments for function to be called.
68 """
69 if psutil:
70 return func(*args, **kwargs)
71 else:
72 _Log('Python module psutil is not installed. Function call %s is '
73 'skipped.' % func)
74 return func_require_psutil
75 return deco_require_psutil
76
77
78def _get_process_count(process_cmd_pattern):
79 """Get the count of processes that match the given command pattern.
80
81 Args:
82 process_cmd_pattern: The regex pattern of process command to match.
83
84 Returns:
85 The count of processes that match the given command pattern.
86 """
87 try:
88 # Use Popen instead of check_output since the latter cannot run with old
89 # python version (less than 2.7)
90 proc = subprocess.Popen(
91 ['pgrep', '-fc', process_cmd_pattern],
92 stdout=subprocess.PIPE,
93 stderr=subprocess.PIPE,
94 )
95 cmd_output, cmd_error = proc.communicate()
96 if cmd_error:
97 _Log('Error happened when getting process count: %s' % cmd_error)
98
99 return int(cmd_output)
100 except subprocess.CalledProcessError:
101 return 0
102
103
104def get_config():
105 """Get cherrypy config for this application."""
106 return {
107 '/': {
108 # Automatically add trailing slash, i.e.
109 # /check_health -> /check_health/.
110 'tools.trailing_slash.on': False,
111 }
112 }
113
114
115class Root(object):
116 """Cherrypy Root class of the application."""
117 def __init__(self, devserver, static_dir):
118 self._static_dir = static_dir
119 self._devserver = devserver
120
121 # Cache of disk IO stats, a thread refresh the stats every 10 seconds.
122 # lock is not used for these variables as the only thread writes to these
123 # variables is _refresh_io_stats.
124 self.disk_read_bytes_per_sec = 0
125 self.disk_write_bytes_per_sec = 0
126 # Cache of network IO stats.
127 self.network_sent_bytes_per_sec = 0
128 self.network_recv_bytes_per_sec = 0
129 self._start_io_stat_thread()
130
131 @require_psutil()
132 def _get_io_stats(self):
133 """Get the IO stats as a dictionary.
134
135 Returns:
136 A dictionary of IO stats collected by psutil.
137 """
138 return {'disk_read_bytes_per_second': self.disk_read_bytes_per_sec,
139 'disk_write_bytes_per_second': self.disk_write_bytes_per_sec,
140 'disk_total_bytes_per_second': (self.disk_read_bytes_per_sec +
141 self.disk_write_bytes_per_sec),
142 'network_sent_bytes_per_second': self.network_sent_bytes_per_sec,
143 'network_recv_bytes_per_second': self.network_recv_bytes_per_sec,
144 'network_total_bytes_per_second': (self.network_sent_bytes_per_sec +
145 self.network_recv_bytes_per_sec),
146 'cpu_percent': psutil.cpu_percent(), }
147
148 @require_psutil()
149 def _refresh_io_stats(self):
150 """A call running in a thread to update IO stats periodically."""
151 prev_disk_io_counters = psutil.disk_io_counters()
152 prev_network_io_counters = psutil.net_io_counters()
153 prev_read_time = time.time()
154 while True:
155 time.sleep(STATS_INTERVAL)
156 now = time.time()
157 interval = now - prev_read_time
158 prev_read_time = now
159 # Disk IO is for all disks.
160 disk_io_counters = psutil.disk_io_counters()
161 network_io_counters = psutil.net_io_counters()
162
163 self.disk_read_bytes_per_sec = (
164 disk_io_counters.read_bytes -
165 prev_disk_io_counters.read_bytes) / interval
166 self.disk_write_bytes_per_sec = (
167 disk_io_counters.write_bytes -
168 prev_disk_io_counters.write_bytes) / interval
169 prev_disk_io_counters = disk_io_counters
170
171 self.network_sent_bytes_per_sec = (
172 network_io_counters.bytes_sent -
173 prev_network_io_counters.bytes_sent) / interval
174 self.network_recv_bytes_per_sec = (
175 network_io_counters.bytes_recv -
176 prev_network_io_counters.bytes_recv) / interval
177 prev_network_io_counters = network_io_counters
178
179 @require_psutil()
180 def _start_io_stat_thread(self):
181 """Start the thread to collect IO stats."""
182 thread = threading.Thread(target=self._refresh_io_stats)
183 thread.daemon = True
184 thread.start()
185
186 @cherrypy.expose
187 def index(self):
188 """Collect the health status of devserver to see if it's ready for staging.
189
190 Returns:
191 A JSON dictionary containing all or some of the following fields:
192 free_disk (int): free disk space in GB
193 staging_thread_count (int): number of devserver threads currently staging
194 an image
195 apache_client_count (int): count of Apache processes.
196 telemetry_test_count (int): count of telemetry tests.
197 gsutil_count (int): count of gsutil processes.
198 """
199 # Get free disk space.
200 stat = os.statvfs(self._static_dir)
201 free_disk = stat.f_bsize * stat.f_bavail / _1G
202 apache_client_count = _get_process_count('bin/apache2? -k start')
203 telemetry_test_count = _get_process_count('python.*telemetry')
204 gsutil_count = _get_process_count('gsutil')
205 au_process_count = len(cros_update_progress.GetAllRunningAUProcess())
206
207 health_data = {
208 'free_disk': free_disk,
209 'staging_thread_count': self._devserver.staging_thread_count,
210 'apache_client_count': apache_client_count,
211 'telemetry_test_count': telemetry_test_count,
212 'gsutil_count': gsutil_count,
213 'au_process_count': au_process_count,
214 }
215 health_data.update(self._get_io_stats() or {})
216
217 return json.dumps(health_data)