blob: 01b0e679a609425f867c08678ff8224bc3d4d79f [file] [log] [blame]
Mike Frysingerf1ba7ad2022-09-12 05:42:57 -04001# Copyright 2017 The ChromiumOS Authors
Allen Li51bb6122017-06-21 12:04:13 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Process metrics."""
6
7from __future__ import absolute_import
Allen Li51bb6122017-06-21 12:04:13 -07008
Allen Li3992c662018-01-05 15:26:36 -08009from functools import partial
Chris McDonald59650c32021-07-20 15:29:28 -060010import logging
Allen Li3992c662018-01-05 15:26:36 -080011
Mike Frysingercb56b642019-08-25 15:33:08 -040012import psutil # pylint: disable=import-error
Allen Li51bb6122017-06-21 12:04:13 -070013
Allen Lia9c6e802017-07-11 15:42:47 -070014from chromite.lib import metrics
Allen Li51bb6122017-06-21 12:04:13 -070015
Chris McDonald59650c32021-07-20 15:29:28 -060016
Allen Li51bb6122017-06-21 12:04:13 -070017logger = logging.getLogger(__name__)
18
Allen Lia9c6e802017-07-11 15:42:47 -070019_count_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060020 "proc/count", description="Number of processes currently running."
21)
Allen Lia9c6e802017-07-11 15:42:47 -070022_cpu_percent_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060023 "proc/cpu_percent", description="CPU usage percent of processes."
24)
Allen Li51bb6122017-06-21 12:04:13 -070025
26
27def collect_proc_info():
Alex Klein1699fab2022-09-08 08:46:06 -060028 collector = _ProcessMetricsCollector()
29 collector.collect()
Allen Li6bb74d52017-06-22 14:44:53 -070030
31
32class _ProcessMetricsCollector(object):
Alex Klein1699fab2022-09-08 08:46:06 -060033 """Class for collecting process metrics."""
Allen Li6bb74d52017-06-22 14:44:53 -070034
Alex Klein1699fab2022-09-08 08:46:06 -060035 def __init__(self):
36 self._metrics = [
37 _ProcessMetric("autoserv", test_func=_is_parent_autoserv),
Congbin Guo522cd982022-10-06 11:47:28 -070038 _ProcessMetric(
Congbin Guo3cdc11e2022-10-11 16:02:32 -070039 "cache-downloader",
Congbin Guofcb436b2023-01-23 20:36:01 -080040 test_func=partial(_is_process_name, "downloader"),
Congbin Guo3cdc11e2022-10-11 16:02:32 -070041 ),
Congbin Guoa8432502023-01-23 20:31:01 -080042 _ProcessMetric("cipd", test_func=partial(_is_process_name, "cipd")),
Congbin Guo3cdc11e2022-10-11 16:02:32 -070043 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070044 "common-tls", test_func=partial(_is_process_name, "common-tls")
45 ),
Alex Klein1699fab2022-09-08 08:46:06 -060046 _ProcessMetric("curl", test_func=partial(_is_process_name, "curl")),
47 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070048 "dnsmasq", test_func=partial(_is_process_name, "dnsmasq")
Alex Klein1699fab2022-09-08 08:46:06 -060049 ),
50 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070051 "drone-agent",
Congbin Guofcb436b2023-01-23 20:36:01 -080052 test_func=partial(_is_process_name, "drone-agent"),
Congbin Guo522cd982022-10-06 11:47:28 -070053 ),
54 _ProcessMetric(
55 "fleet-tlw", test_func=partial(_is_process_name, "fleet-tlw")
56 ),
57 _ProcessMetric(
58 "getty", test_func=partial(_is_process_name, "getty")
Alex Klein1699fab2022-09-08 08:46:06 -060059 ),
60 _ProcessMetric(
61 "gs_offloader",
62 test_func=partial(_is_process_name, "gs_offloader.py"),
63 ),
64 _ProcessMetric("gsutil", test_func=_is_gsutil),
65 _ProcessMetric("java", test_func=partial(_is_process_name, "java")),
66 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070067 "labservice", test_func=partial(_is_process_name, "labservice")
68 ),
69 _ProcessMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060070 "lxc-attach", test_func=partial(_is_process_name, "lxc-attach")
71 ),
72 _ProcessMetric(
73 "lxc-start", test_func=partial(_is_process_name, "lxc-start")
74 ),
Congbin Guoa8432502023-01-23 20:31:01 -080075 _ProcessMetric(
76 "podman-pull", test_func=partial(_is_podman, "pull")
77 ),
78 _ProcessMetric("podman-run", test_func=partial(_is_podman, "run")),
Alex Klein1699fab2022-09-08 08:46:06 -060079 _ProcessMetric("sshd", test_func=partial(_is_process_name, "sshd")),
80 _ProcessMetric("swarming_bot", test_func=_is_swarming_bot),
81 _ProcessMetric(
82 "sysmon",
83 test_func=partial(_is_python_module, "chromite.scripts.sysmon"),
84 ),
Congbin Guo522cd982022-10-06 11:47:28 -070085 _ProcessMetric("tko_proxy", test_func=_is_tko_proxy),
Alex Klein1699fab2022-09-08 08:46:06 -060086 ]
87 self._other_metric = _ProcessMetric("other")
Allen Li6bb74d52017-06-22 14:44:53 -070088
Alex Klein1699fab2022-09-08 08:46:06 -060089 def collect(self):
90 for proc in psutil.process_iter():
91 self._collect_proc(proc)
92 self._flush()
Allen Li6bb74d52017-06-22 14:44:53 -070093
Alex Klein1699fab2022-09-08 08:46:06 -060094 def _collect_proc(self, proc):
95 for metric in self._metrics:
96 if metric.add(proc):
97 break
98 else:
99 self._other_metric.add(proc)
Allen Li6bb74d52017-06-22 14:44:53 -0700100
Alex Klein1699fab2022-09-08 08:46:06 -0600101 def _flush(self):
102 for metric in self._metrics:
103 metric.flush()
104 self._other_metric.flush()
Allen Li6bb74d52017-06-22 14:44:53 -0700105
106
107class _ProcessMetric(object):
Alex Klein1699fab2022-09-08 08:46:06 -0600108 """Class for gathering process metrics."""
Allen Li6bb74d52017-06-22 14:44:53 -0700109
Alex Klein1699fab2022-09-08 08:46:06 -0600110 def __init__(self, process_name, test_func=lambda proc: True):
111 """Initialize instance.
Allen Li6bb74d52017-06-22 14:44:53 -0700112
Alex Klein1699fab2022-09-08 08:46:06 -0600113 process_name is used to identify the metric stream.
Allen Li6bb74d52017-06-22 14:44:53 -0700114
Alex Klein1699fab2022-09-08 08:46:06 -0600115 test_func is a function called
116 for each process. If it returns True, the process is counted. The
117 default test is to count every process.
118 """
119 self._fields = {
120 "process_name": process_name,
121 }
122 self._test_func = test_func
123 self._count = 0
124 self._cpu_percent = 0
Allen Li6bb74d52017-06-22 14:44:53 -0700125
Alex Klein1699fab2022-09-08 08:46:06 -0600126 def add(self, proc):
127 """Do metric collection for the given process.
Allen Li6bb74d52017-06-22 14:44:53 -0700128
Alex Klein1699fab2022-09-08 08:46:06 -0600129 Returns True if the process was collected.
130 """
131 if not self._test_func(proc):
132 return False
133 self._count += 1
134 self._cpu_percent += proc.cpu_percent()
135 return True
Allen Li6bb74d52017-06-22 14:44:53 -0700136
Alex Klein1699fab2022-09-08 08:46:06 -0600137 def flush(self):
138 """Finish collection and send metrics."""
139 _count_metric.set(self._count, fields=self._fields)
140 self._count = 0
141 _cpu_percent_metric.set(
142 int(round(self._cpu_percent)), fields=self._fields
143 )
144 self._cpu_percent = 0
Allen Li51bb6122017-06-21 12:04:13 -0700145
146
147def _is_parent_autoserv(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600148 """Return whether proc is a parent (not forked) autoserv process."""
149 return _is_autoserv(proc) and not _is_autoserv(proc.parent())
Allen Li51bb6122017-06-21 12:04:13 -0700150
151
152def _is_autoserv(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600153 """Return whether proc is an autoserv process."""
154 # This relies on the autoserv script being run directly. The script should
155 # be named autoserv exactly and start with a shebang that is /usr/bin/python,
156 # NOT /bin/env
157 return _is_process_name("autoserv", proc)
Allen Li51bb6122017-06-21 12:04:13 -0700158
159
Allen Li3992c662018-01-05 15:26:36 -0800160def _is_python_module(module, proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600161 """Return whether proc is a process running a Python module."""
162 cmdline = proc.cmdline()
163 return (
164 cmdline
165 and cmdline[0].endswith("python")
166 and cmdline[1:3] == ["-m", module]
167 )
Allen Li3992c662018-01-05 15:26:36 -0800168
169
Prathmesh Prabhu0b795f02018-05-07 13:12:37 -0700170def _is_process_name(name, proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600171 """Return whether process proc is named name."""
172 return proc.name() == name
Congbin Guo17542e02022-06-29 13:48:15 -0700173
174
175def _is_swarming_bot(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600176 """Return whether proc is a Swarming bot.
Congbin Guo17542e02022-06-29 13:48:15 -0700177
Alex Klein1699fab2022-09-08 08:46:06 -0600178 A swarming bot process is like '/usr/bin/python3.8 <bot-zip-path> start_bot'.
179 """
180 cmdline = proc.cmdline()
181 return (
182 len(cmdline) == 3
183 and cmdline[0].split("/")[-1].startswith("python")
184 and cmdline[2] == "start_bot"
185 )
Congbin Guo17542e02022-06-29 13:48:15 -0700186
187
188def _is_gsutil(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600189 """Return whether proc is gsutil."""
190 cmdline = proc.cmdline()
191 return (
192 len(cmdline) >= 2
193 and cmdline[0] == "python"
194 and cmdline[1].endswith("gsutil")
195 )
Congbin Guo522cd982022-10-06 11:47:28 -0700196
197
198def _is_tko_proxy(proc):
199 """Return whether proc is a tko proxy.
200
201 A tk proxy process is like
202 '/opt/cloud_sql_proxy -dir=<...>
203 -instances=google.com:chromeos-lab:us-central1:tko
204 -credential_file=<...>'.
205 """
206 cmdline = proc.cmdline()
207 return (
208 len(cmdline) == 4
Congbin Guofcb436b2023-01-23 20:36:01 -0800209 and cmdline[0].split("/")[-1] == "cloud_sql_proxy"
210 and cmdline[2] == "-instances=google.com:chromeos-lab:us-central1:tko"
Congbin Guo522cd982022-10-06 11:47:28 -0700211 )
Congbin Guoa8432502023-01-23 20:31:01 -0800212
213
214def _is_podman(subcmd, proc):
215 """Return whiter proc is a podman process.
216
217 A podman pull process is like
218 'podman pull image:tag'
219 A podman run process is like
220 'podman run --option ... image:tag'
221 """
222 cmdline = proc.cmdline()
223 return proc.name() == "podman" and cmdline[1] == subcmd