blob: 5d9ce201f94200c7627fa81c0f69789655a2f367 [file] [log] [blame]
Congbin Guo3afae6c2019-08-13 16:29:42 -07001# -*- coding: utf-8 -*-
2# Copyright 2019 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""A cherrypy application to check devserver health status."""
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
11import json
12import os
13import subprocess
14import threading
15import time
16
Amin Hassanid4e35392019-10-03 11:02:44 -070017import cherrypy # pylint: disable=import-error
Amin Hassanie427e212019-10-28 11:04:27 -070018import psutil # pylint: disable=import-error
Congbin Guo3afae6c2019-08-13 16:29:42 -070019
Achuith Bhandarkar662fb722019-10-31 16:12:49 -070020import setup_chromite # pylint: disable=unused-import
Amin Hassanie427e212019-10-28 11:04:27 -070021from chromite.lib import cros_update_progress
Achuith Bhandarkar662fb722019-10-31 16:12:49 -070022from chromite.lib.xbuddy import cherrypy_log_util
Congbin Guo3afae6c2019-08-13 16:29:42 -070023
Congbin Guo3afae6c2019-08-13 16:29:42 -070024
Achuith Bhandarkar662fb722019-10-31 16:12:49 -070025def _Log(message, *args):
26 """Module-local log function."""
27 return cherrypy_log_util.LogWithTag('HEALTHCHECKER', message, *args)
28
Congbin Guo3afae6c2019-08-13 16:29:42 -070029# Number of seconds between the collection of disk and network IO counters.
30STATS_INTERVAL = 10.0
31_1G = 1000000000
32
33
34def require_psutil():
35 """Decorator for functions require psutil to run."""
36 def deco_require_psutil(func):
37 """Wrapper of the decorator function.
38
39 Args:
40 func: function to be called.
41 """
42 def func_require_psutil(*args, **kwargs):
43 """Decorator for functions require psutil to run.
44
45 If psutil is not installed, skip calling the function.
46
47 Args:
48 *args: arguments for function to be called.
49 **kwargs: keyword arguments for function to be called.
50 """
51 if psutil:
52 return func(*args, **kwargs)
53 else:
54 _Log('Python module psutil is not installed. Function call %s is '
55 'skipped.' % func)
56 return func_require_psutil
57 return deco_require_psutil
58
59
60def _get_process_count(process_cmd_pattern):
61 """Get the count of processes that match the given command pattern.
62
63 Args:
64 process_cmd_pattern: The regex pattern of process command to match.
65
66 Returns:
67 The count of processes that match the given command pattern.
68 """
69 try:
70 # Use Popen instead of check_output since the latter cannot run with old
71 # python version (less than 2.7)
72 proc = subprocess.Popen(
73 ['pgrep', '-fc', process_cmd_pattern],
74 stdout=subprocess.PIPE,
75 stderr=subprocess.PIPE,
76 )
77 cmd_output, cmd_error = proc.communicate()
78 if cmd_error:
79 _Log('Error happened when getting process count: %s' % cmd_error)
80
81 return int(cmd_output)
82 except subprocess.CalledProcessError:
83 return 0
84
85
86def get_config():
87 """Get cherrypy config for this application."""
88 return {
89 '/': {
90 # Automatically add trailing slash, i.e.
91 # /check_health -> /check_health/.
92 'tools.trailing_slash.on': False,
93 }
94 }
95
96
97class Root(object):
98 """Cherrypy Root class of the application."""
99 def __init__(self, devserver, static_dir):
100 self._static_dir = static_dir
101 self._devserver = devserver
102
103 # Cache of disk IO stats, a thread refresh the stats every 10 seconds.
104 # lock is not used for these variables as the only thread writes to these
105 # variables is _refresh_io_stats.
106 self.disk_read_bytes_per_sec = 0
107 self.disk_write_bytes_per_sec = 0
108 # Cache of network IO stats.
109 self.network_sent_bytes_per_sec = 0
110 self.network_recv_bytes_per_sec = 0
111 self._start_io_stat_thread()
112
113 @require_psutil()
114 def _get_io_stats(self):
115 """Get the IO stats as a dictionary.
116
117 Returns:
118 A dictionary of IO stats collected by psutil.
119 """
120 return {'disk_read_bytes_per_second': self.disk_read_bytes_per_sec,
121 'disk_write_bytes_per_second': self.disk_write_bytes_per_sec,
122 'disk_total_bytes_per_second': (self.disk_read_bytes_per_sec +
123 self.disk_write_bytes_per_sec),
124 'network_sent_bytes_per_second': self.network_sent_bytes_per_sec,
125 'network_recv_bytes_per_second': self.network_recv_bytes_per_sec,
126 'network_total_bytes_per_second': (self.network_sent_bytes_per_sec +
127 self.network_recv_bytes_per_sec),
128 'cpu_percent': psutil.cpu_percent(), }
129
130 @require_psutil()
131 def _refresh_io_stats(self):
132 """A call running in a thread to update IO stats periodically."""
133 prev_disk_io_counters = psutil.disk_io_counters()
134 prev_network_io_counters = psutil.net_io_counters()
135 prev_read_time = time.time()
136 while True:
137 time.sleep(STATS_INTERVAL)
138 now = time.time()
139 interval = now - prev_read_time
140 prev_read_time = now
141 # Disk IO is for all disks.
142 disk_io_counters = psutil.disk_io_counters()
143 network_io_counters = psutil.net_io_counters()
144
145 self.disk_read_bytes_per_sec = (
146 disk_io_counters.read_bytes -
147 prev_disk_io_counters.read_bytes) / interval
148 self.disk_write_bytes_per_sec = (
149 disk_io_counters.write_bytes -
150 prev_disk_io_counters.write_bytes) / interval
151 prev_disk_io_counters = disk_io_counters
152
153 self.network_sent_bytes_per_sec = (
154 network_io_counters.bytes_sent -
155 prev_network_io_counters.bytes_sent) / interval
156 self.network_recv_bytes_per_sec = (
157 network_io_counters.bytes_recv -
158 prev_network_io_counters.bytes_recv) / interval
159 prev_network_io_counters = network_io_counters
160
161 @require_psutil()
162 def _start_io_stat_thread(self):
163 """Start the thread to collect IO stats."""
164 thread = threading.Thread(target=self._refresh_io_stats)
165 thread.daemon = True
166 thread.start()
167
168 @cherrypy.expose
169 def index(self):
170 """Collect the health status of devserver to see if it's ready for staging.
171
172 Returns:
173 A JSON dictionary containing all or some of the following fields:
174 free_disk (int): free disk space in GB
175 staging_thread_count (int): number of devserver threads currently staging
176 an image
177 apache_client_count (int): count of Apache processes.
178 telemetry_test_count (int): count of telemetry tests.
179 gsutil_count (int): count of gsutil processes.
180 """
181 # Get free disk space.
182 stat = os.statvfs(self._static_dir)
183 free_disk = stat.f_bsize * stat.f_bavail / _1G
184 apache_client_count = _get_process_count('bin/apache2? -k start')
185 telemetry_test_count = _get_process_count('python.*telemetry')
186 gsutil_count = _get_process_count('gsutil')
187 au_process_count = len(cros_update_progress.GetAllRunningAUProcess())
188
189 health_data = {
190 'free_disk': free_disk,
191 'staging_thread_count': self._devserver.staging_thread_count,
192 'apache_client_count': apache_client_count,
193 'telemetry_test_count': telemetry_test_count,
194 'gsutil_count': gsutil_count,
195 'au_process_count': au_process_count,
196 }
197 health_data.update(self._get_io_stats() or {})
198
199 return json.dumps(health_data)