scripts: sysmon: add cpu times metrics for processes
BUG=b:255782067
TEST=Ran the unit tests.
Change-Id: I3fc5288f50ca11a358837be72fe9d78e9fa2a588
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/4242821
Tested-by: Congbin Guo <guocb@chromium.org>
Reviewed-by: Sergey Fetisov <sfetisov@google.com>
Auto-Submit: Congbin Guo <guocb@chromium.org>
Commit-Queue: Congbin Guo <guocb@chromium.org>
diff --git a/scripts/sysmon/proc_metrics.py b/scripts/sysmon/proc_metrics.py
index 32e12d0..2488c29 100644
--- a/scripts/sysmon/proc_metrics.py
+++ b/scripts/sysmon/proc_metrics.py
@@ -25,6 +25,10 @@
_cpu_percent_metric = metrics.GaugeMetric(
"proc/cpu_percent", description="CPU usage percent of processes."
)
+_cpu_times_metric = metrics.CumulativeMetric(
+ "proc/cpu_times",
+ description="Accumulated CPU time in each specific mode of processes.",
+)
def collect_proc_info():
@@ -35,6 +39,10 @@
class _ProcessMetricsCollector(object):
"""Class for collecting process metrics."""
+ # We need to store some per process metrics of last run in order to
+ # calculate the detla and aggregate them.
+ old_cpu_times = {}
+
def __init__(self):
self._metrics = [
_ProcessMetric("adb", test_func=partial(_is_process_name, "adb")),
@@ -105,9 +113,12 @@
self._other_metric = _ProcessMetric("other")
def collect(self):
+ new_cpu_times = {}
for proc in psutil.process_iter():
+ new_cpu_times[proc.pid] = proc.cpu_times()
self._collect_proc(proc)
self._flush()
+ _ProcessMetricsCollector.old_cpu_times = new_cpu_times
def _collect_proc(self, proc):
for metric in self._metrics:
@@ -141,6 +152,7 @@
self._count = 0
self._thread_count = 0
self._cpu_percent = 0
+ self._cpu_times = _CPUTimes()
def add(self, proc):
"""Do metric collection for the given process.
@@ -152,6 +164,11 @@
self._count += 1
self._thread_count += proc.num_threads()
self._cpu_percent += proc.cpu_percent()
+
+ self._cpu_times += _CPUTimes(
+ proc.cpu_times()
+ ) - _ProcessMetricsCollector.old_cpu_times.get(proc.pid)
+
return True
def flush(self):
@@ -167,6 +184,55 @@
)
self._cpu_percent = 0
+ for mode, t in self._cpu_times.asdict().items():
+ _cpu_times_metric.increment_by(
+ t, fields={**self._fields, "mode": mode}
+ )
+ self._cpu_times = _CPUTimes()
+
+
+class _CPUTimes(object):
+ """A container for CPU times metrics."""
+
+ def __init__(self, v=None):
+ self.system = v.system if v else 0
+ self.user = v.user if v else 0
+ self.iowait = v.iowait if v else 0
+ self.children_system = v.children_system if v else 0
+ self.children_user = v.children_user if v else 0
+
+ def __sub__(self, rhs):
+ if not rhs:
+ return self
+
+ r = _CPUTimes()
+ r.system = self.system - rhs.system
+ r.user = self.user - rhs.user
+ r.iowait = self.iowait - rhs.iowait
+ r.children_system = self.children_system - rhs.children_system
+ r.children_user = self.children_user - rhs.children_user
+ return r
+
+ def __iadd__(self, rhs):
+ if not rhs:
+ return self
+
+ self.system += rhs.system
+ self.user += rhs.user
+ self.iowait += rhs.iowait
+ self.children_system += rhs.children_system
+ self.children_user += rhs.children_user
+ return self
+
+ def asdict(self):
+ return {
+ "system": self.system,
+ "user": self.user,
+ "iowait": self.iowait,
+ "children_system": self.children_system,
+ "children_user": self.children_user,
+ }
+
def _is_parent_autoserv(proc):
"""Return whether proc is a parent (not forked) autoserv process."""