devserver: refactor check_health to a separate app
This CL move all /check_health related code to a separate module and
wrap it as a standalone cherrypy application.
BUG=chromium:993621
TEST=1. Ran devserver_integrated_test.py in chroot
2. Ran below command locally to verify:
$ curl http://127.0.0.1:8080/check_health
Change-Id: Ic9c483202a2cef54d6953924e90001c9937a4fa8
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/1753193
Tested-by: Congbin Guo <guocb@chromium.org>
Auto-Submit: Congbin Guo <guocb@chromium.org>
Reviewed-by: C Shapiro <shapiroc@chromium.org>
Commit-Queue: Congbin Guo <guocb@chromium.org>
diff --git a/health_checker.py b/health_checker.py
new file mode 100644
index 0000000..8c274a3
--- /dev/null
+++ b/health_checker.py
@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+"""A cherrypy application to check devserver health status."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os
+import subprocess
+import threading
+import time
+
+import cherrypy
+
+import cros_update_progress
+import log_util
+
+
+def _Log(message, *args):
+ """Module-local log function."""
+ return log_util.LogWithTag('HEALTHCHECKER', message, *args)
+
+
+try:
+ import psutil
+except ImportError:
+ # Ignore psutil import failure. This is for backwards compatibility, so
+ # "cros flash" can still update duts with build without psutil installed.
+ # The reason is that, during cros flash, local devserver code is copied over
+ # to DUT, and devserver will be running inside DUT to stage the build.
+ _Log('Python module psutil is not installed, devserver load data will not be '
+ 'collected')
+ psutil = None
+except OSError as e:
+ # Ignore error like following. psutil may not work properly in builder. Ignore
+ # the error as load information of devserver is not used in builder.
+ # OSError: [Errno 2] No such file or directory: '/dev/pts/0'
+ _Log('psutil is failed to be imported, error: %s. devserver load data will '
+ 'not be collected.', e)
+ psutil = None
+
+
+# Number of seconds between the collection of disk and network IO counters.
+STATS_INTERVAL = 10.0
+_1G = 1000000000
+
+
+def require_psutil():
+ """Decorator for functions require psutil to run."""
+ def deco_require_psutil(func):
+ """Wrapper of the decorator function.
+
+ Args:
+ func: function to be called.
+ """
+ def func_require_psutil(*args, **kwargs):
+ """Decorator for functions require psutil to run.
+
+ If psutil is not installed, skip calling the function.
+
+ Args:
+ *args: arguments for function to be called.
+ **kwargs: keyword arguments for function to be called.
+ """
+ if psutil:
+ return func(*args, **kwargs)
+ else:
+ _Log('Python module psutil is not installed. Function call %s is '
+ 'skipped.' % func)
+ return func_require_psutil
+ return deco_require_psutil
+
+
+def _get_process_count(process_cmd_pattern):
+ """Get the count of processes that match the given command pattern.
+
+ Args:
+ process_cmd_pattern: The regex pattern of process command to match.
+
+ Returns:
+ The count of processes that match the given command pattern.
+ """
+ try:
+ # Use Popen instead of check_output since the latter cannot run with old
+ # python version (less than 2.7)
+ proc = subprocess.Popen(
+ ['pgrep', '-fc', process_cmd_pattern],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+ cmd_output, cmd_error = proc.communicate()
+ if cmd_error:
+ _Log('Error happened when getting process count: %s' % cmd_error)
+
+ return int(cmd_output)
+ except subprocess.CalledProcessError:
+ return 0
+
+
+def get_config():
+ """Get cherrypy config for this application."""
+ return {
+ '/': {
+ # Automatically add trailing slash, i.e.
+ # /check_health -> /check_health/.
+ 'tools.trailing_slash.on': False,
+ }
+ }
+
+
+class Root(object):
+ """Cherrypy Root class of the application."""
+ def __init__(self, devserver, static_dir):
+ self._static_dir = static_dir
+ self._devserver = devserver
+
+ # Cache of disk IO stats, a thread refresh the stats every 10 seconds.
+ # lock is not used for these variables as the only thread writes to these
+ # variables is _refresh_io_stats.
+ self.disk_read_bytes_per_sec = 0
+ self.disk_write_bytes_per_sec = 0
+ # Cache of network IO stats.
+ self.network_sent_bytes_per_sec = 0
+ self.network_recv_bytes_per_sec = 0
+ self._start_io_stat_thread()
+
+ @require_psutil()
+ def _get_io_stats(self):
+ """Get the IO stats as a dictionary.
+
+ Returns:
+ A dictionary of IO stats collected by psutil.
+ """
+ return {'disk_read_bytes_per_second': self.disk_read_bytes_per_sec,
+ 'disk_write_bytes_per_second': self.disk_write_bytes_per_sec,
+ 'disk_total_bytes_per_second': (self.disk_read_bytes_per_sec +
+ self.disk_write_bytes_per_sec),
+ 'network_sent_bytes_per_second': self.network_sent_bytes_per_sec,
+ 'network_recv_bytes_per_second': self.network_recv_bytes_per_sec,
+ 'network_total_bytes_per_second': (self.network_sent_bytes_per_sec +
+ self.network_recv_bytes_per_sec),
+ 'cpu_percent': psutil.cpu_percent(), }
+
+ @require_psutil()
+ def _refresh_io_stats(self):
+ """A call running in a thread to update IO stats periodically."""
+ prev_disk_io_counters = psutil.disk_io_counters()
+ prev_network_io_counters = psutil.net_io_counters()
+ prev_read_time = time.time()
+ while True:
+ time.sleep(STATS_INTERVAL)
+ now = time.time()
+ interval = now - prev_read_time
+ prev_read_time = now
+ # Disk IO is for all disks.
+ disk_io_counters = psutil.disk_io_counters()
+ network_io_counters = psutil.net_io_counters()
+
+ self.disk_read_bytes_per_sec = (
+ disk_io_counters.read_bytes -
+ prev_disk_io_counters.read_bytes) / interval
+ self.disk_write_bytes_per_sec = (
+ disk_io_counters.write_bytes -
+ prev_disk_io_counters.write_bytes) / interval
+ prev_disk_io_counters = disk_io_counters
+
+ self.network_sent_bytes_per_sec = (
+ network_io_counters.bytes_sent -
+ prev_network_io_counters.bytes_sent) / interval
+ self.network_recv_bytes_per_sec = (
+ network_io_counters.bytes_recv -
+ prev_network_io_counters.bytes_recv) / interval
+ prev_network_io_counters = network_io_counters
+
+ @require_psutil()
+ def _start_io_stat_thread(self):
+ """Start the thread to collect IO stats."""
+ thread = threading.Thread(target=self._refresh_io_stats)
+ thread.daemon = True
+ thread.start()
+
+ @cherrypy.expose
+ def index(self):
+ """Collect the health status of devserver to see if it's ready for staging.
+
+ Returns:
+ A JSON dictionary containing all or some of the following fields:
+ free_disk (int): free disk space in GB
+ staging_thread_count (int): number of devserver threads currently staging
+ an image
+ apache_client_count (int): count of Apache processes.
+ telemetry_test_count (int): count of telemetry tests.
+ gsutil_count (int): count of gsutil processes.
+ """
+ # Get free disk space.
+ stat = os.statvfs(self._static_dir)
+ free_disk = stat.f_bsize * stat.f_bavail / _1G
+ apache_client_count = _get_process_count('bin/apache2? -k start')
+ telemetry_test_count = _get_process_count('python.*telemetry')
+ gsutil_count = _get_process_count('gsutil')
+ au_process_count = len(cros_update_progress.GetAllRunningAUProcess())
+
+ health_data = {
+ 'free_disk': free_disk,
+ 'staging_thread_count': self._devserver.staging_thread_count,
+ 'apache_client_count': apache_client_count,
+ 'telemetry_test_count': telemetry_test_count,
+ 'gsutil_count': gsutil_count,
+ 'au_process_count': au_process_count,
+ }
+ health_data.update(self._get_io_stats() or {})
+
+ return json.dumps(health_data)