blob: 25719db0de880e0d48b0f1ebd849fb80f3bb84a6 [file] [log] [blame]
Mike Frysingerf1ba7ad2022-09-12 05:42:57 -04001# Copyright 2017 The ChromiumOS Authors
Allen Li51bb6122017-06-21 12:04:13 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Process metrics."""
6
7from __future__ import absolute_import
Allen Li51bb6122017-06-21 12:04:13 -07008
Allen Li3992c662018-01-05 15:26:36 -08009from functools import partial
Chris McDonald59650c32021-07-20 15:29:28 -060010import logging
Allen Li3992c662018-01-05 15:26:36 -080011
Mike Frysingercb56b642019-08-25 15:33:08 -040012import psutil # pylint: disable=import-error
Allen Li51bb6122017-06-21 12:04:13 -070013
Allen Lia9c6e802017-07-11 15:42:47 -070014from chromite.lib import metrics
Allen Li51bb6122017-06-21 12:04:13 -070015
Chris McDonald59650c32021-07-20 15:29:28 -060016
Allen Li51bb6122017-06-21 12:04:13 -070017logger = logging.getLogger(__name__)
18
Allen Lia9c6e802017-07-11 15:42:47 -070019_count_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060020 "proc/count", description="Number of processes currently running."
21)
Congbin Guo16b64d52023-02-10 17:50:30 -080022_thread_count_metric = metrics.GaugeMetric(
23 "proc/thread_count", description="Number of threads currently running."
24)
Allen Lia9c6e802017-07-11 15:42:47 -070025_cpu_percent_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060026 "proc/cpu_percent", description="CPU usage percent of processes."
27)
Allen Li51bb6122017-06-21 12:04:13 -070028
29
30def collect_proc_info():
Alex Klein1699fab2022-09-08 08:46:06 -060031 collector = _ProcessMetricsCollector()
32 collector.collect()
Allen Li6bb74d52017-06-22 14:44:53 -070033
34
35class _ProcessMetricsCollector(object):
Alex Klein1699fab2022-09-08 08:46:06 -060036 """Class for collecting process metrics."""
Allen Li6bb74d52017-06-22 14:44:53 -070037
Alex Klein1699fab2022-09-08 08:46:06 -060038 def __init__(self):
39 self._metrics = [
40 _ProcessMetric("autoserv", test_func=_is_parent_autoserv),
Congbin Guo522cd982022-10-06 11:47:28 -070041 _ProcessMetric(
Congbin Guo3cdc11e2022-10-11 16:02:32 -070042 "cache-downloader",
Congbin Guofcb436b2023-01-23 20:36:01 -080043 test_func=partial(_is_process_name, "downloader"),
Congbin Guo3cdc11e2022-10-11 16:02:32 -070044 ),
Congbin Guoa8432502023-01-23 20:31:01 -080045 _ProcessMetric("cipd", test_func=partial(_is_process_name, "cipd")),
Congbin Guo3cdc11e2022-10-11 16:02:32 -070046 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070047 "common-tls", test_func=partial(_is_process_name, "common-tls")
48 ),
Alex Klein1699fab2022-09-08 08:46:06 -060049 _ProcessMetric("curl", test_func=partial(_is_process_name, "curl")),
50 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070051 "dnsmasq", test_func=partial(_is_process_name, "dnsmasq")
Alex Klein1699fab2022-09-08 08:46:06 -060052 ),
53 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070054 "drone-agent",
Congbin Guofcb436b2023-01-23 20:36:01 -080055 test_func=partial(_is_process_name, "drone-agent"),
Congbin Guo522cd982022-10-06 11:47:28 -070056 ),
57 _ProcessMetric(
58 "fleet-tlw", test_func=partial(_is_process_name, "fleet-tlw")
59 ),
60 _ProcessMetric(
61 "getty", test_func=partial(_is_process_name, "getty")
Alex Klein1699fab2022-09-08 08:46:06 -060062 ),
63 _ProcessMetric(
64 "gs_offloader",
65 test_func=partial(_is_process_name, "gs_offloader.py"),
66 ),
67 _ProcessMetric("gsutil", test_func=_is_gsutil),
68 _ProcessMetric("java", test_func=partial(_is_process_name, "java")),
69 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070070 "labservice", test_func=partial(_is_process_name, "labservice")
71 ),
72 _ProcessMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060073 "lxc-attach", test_func=partial(_is_process_name, "lxc-attach")
74 ),
75 _ProcessMetric(
76 "lxc-start", test_func=partial(_is_process_name, "lxc-start")
77 ),
Congbin Guoa8432502023-01-23 20:31:01 -080078 _ProcessMetric(
79 "podman-pull", test_func=partial(_is_podman, "pull")
80 ),
81 _ProcessMetric("podman-run", test_func=partial(_is_podman, "run")),
Alex Klein1699fab2022-09-08 08:46:06 -060082 _ProcessMetric("sshd", test_func=partial(_is_process_name, "sshd")),
83 _ProcessMetric("swarming_bot", test_func=_is_swarming_bot),
84 _ProcessMetric(
85 "sysmon",
86 test_func=partial(_is_python_module, "chromite.scripts.sysmon"),
87 ),
Congbin Guo522cd982022-10-06 11:47:28 -070088 _ProcessMetric("tko_proxy", test_func=_is_tko_proxy),
Alex Klein1699fab2022-09-08 08:46:06 -060089 ]
90 self._other_metric = _ProcessMetric("other")
Allen Li6bb74d52017-06-22 14:44:53 -070091
Alex Klein1699fab2022-09-08 08:46:06 -060092 def collect(self):
93 for proc in psutil.process_iter():
94 self._collect_proc(proc)
95 self._flush()
Allen Li6bb74d52017-06-22 14:44:53 -070096
Alex Klein1699fab2022-09-08 08:46:06 -060097 def _collect_proc(self, proc):
98 for metric in self._metrics:
99 if metric.add(proc):
100 break
101 else:
102 self._other_metric.add(proc)
Allen Li6bb74d52017-06-22 14:44:53 -0700103
Alex Klein1699fab2022-09-08 08:46:06 -0600104 def _flush(self):
105 for metric in self._metrics:
106 metric.flush()
107 self._other_metric.flush()
Allen Li6bb74d52017-06-22 14:44:53 -0700108
109
110class _ProcessMetric(object):
Alex Klein1699fab2022-09-08 08:46:06 -0600111 """Class for gathering process metrics."""
Allen Li6bb74d52017-06-22 14:44:53 -0700112
Alex Klein1699fab2022-09-08 08:46:06 -0600113 def __init__(self, process_name, test_func=lambda proc: True):
114 """Initialize instance.
Allen Li6bb74d52017-06-22 14:44:53 -0700115
Alex Klein1699fab2022-09-08 08:46:06 -0600116 process_name is used to identify the metric stream.
Allen Li6bb74d52017-06-22 14:44:53 -0700117
Alex Klein1699fab2022-09-08 08:46:06 -0600118 test_func is a function called
119 for each process. If it returns True, the process is counted. The
120 default test is to count every process.
121 """
122 self._fields = {
123 "process_name": process_name,
124 }
125 self._test_func = test_func
126 self._count = 0
Congbin Guo16b64d52023-02-10 17:50:30 -0800127 self._thread_count = 0
Alex Klein1699fab2022-09-08 08:46:06 -0600128 self._cpu_percent = 0
Allen Li6bb74d52017-06-22 14:44:53 -0700129
Alex Klein1699fab2022-09-08 08:46:06 -0600130 def add(self, proc):
131 """Do metric collection for the given process.
Allen Li6bb74d52017-06-22 14:44:53 -0700132
Alex Klein1699fab2022-09-08 08:46:06 -0600133 Returns True if the process was collected.
134 """
135 if not self._test_func(proc):
136 return False
137 self._count += 1
Congbin Guo16b64d52023-02-10 17:50:30 -0800138 self._thread_count += proc.num_threads()
Alex Klein1699fab2022-09-08 08:46:06 -0600139 self._cpu_percent += proc.cpu_percent()
140 return True
Allen Li6bb74d52017-06-22 14:44:53 -0700141
Alex Klein1699fab2022-09-08 08:46:06 -0600142 def flush(self):
143 """Finish collection and send metrics."""
144 _count_metric.set(self._count, fields=self._fields)
145 self._count = 0
Congbin Guo16b64d52023-02-10 17:50:30 -0800146
147 _thread_count_metric.set(self._thread_count, fields=self._fields)
148 self._thread_count = 0
149
Alex Klein1699fab2022-09-08 08:46:06 -0600150 _cpu_percent_metric.set(
151 int(round(self._cpu_percent)), fields=self._fields
152 )
153 self._cpu_percent = 0
Allen Li51bb6122017-06-21 12:04:13 -0700154
155
156def _is_parent_autoserv(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600157 """Return whether proc is a parent (not forked) autoserv process."""
158 return _is_autoserv(proc) and not _is_autoserv(proc.parent())
Allen Li51bb6122017-06-21 12:04:13 -0700159
160
161def _is_autoserv(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600162 """Return whether proc is an autoserv process."""
163 # This relies on the autoserv script being run directly. The script should
164 # be named autoserv exactly and start with a shebang that is /usr/bin/python,
165 # NOT /bin/env
166 return _is_process_name("autoserv", proc)
Allen Li51bb6122017-06-21 12:04:13 -0700167
168
Allen Li3992c662018-01-05 15:26:36 -0800169def _is_python_module(module, proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600170 """Return whether proc is a process running a Python module."""
171 cmdline = proc.cmdline()
172 return (
173 cmdline
174 and cmdline[0].endswith("python")
175 and cmdline[1:3] == ["-m", module]
176 )
Allen Li3992c662018-01-05 15:26:36 -0800177
178
Prathmesh Prabhu0b795f02018-05-07 13:12:37 -0700179def _is_process_name(name, proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600180 """Return whether process proc is named name."""
181 return proc.name() == name
Congbin Guo17542e02022-06-29 13:48:15 -0700182
183
184def _is_swarming_bot(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600185 """Return whether proc is a Swarming bot.
Congbin Guo17542e02022-06-29 13:48:15 -0700186
Alex Klein1699fab2022-09-08 08:46:06 -0600187 A swarming bot process is like '/usr/bin/python3.8 <bot-zip-path> start_bot'.
188 """
189 cmdline = proc.cmdline()
190 return (
191 len(cmdline) == 3
192 and cmdline[0].split("/")[-1].startswith("python")
193 and cmdline[2] == "start_bot"
194 )
Congbin Guo17542e02022-06-29 13:48:15 -0700195
196
197def _is_gsutil(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600198 """Return whether proc is gsutil."""
199 cmdline = proc.cmdline()
200 return (
201 len(cmdline) >= 2
202 and cmdline[0] == "python"
203 and cmdline[1].endswith("gsutil")
204 )
Congbin Guo522cd982022-10-06 11:47:28 -0700205
206
207def _is_tko_proxy(proc):
208 """Return whether proc is a tko proxy.
209
210 A tk proxy process is like
211 '/opt/cloud_sql_proxy -dir=<...>
212 -instances=google.com:chromeos-lab:us-central1:tko
213 -credential_file=<...>'.
214 """
215 cmdline = proc.cmdline()
216 return (
217 len(cmdline) == 4
Congbin Guofcb436b2023-01-23 20:36:01 -0800218 and cmdline[0].split("/")[-1] == "cloud_sql_proxy"
219 and cmdline[2] == "-instances=google.com:chromeos-lab:us-central1:tko"
Congbin Guo522cd982022-10-06 11:47:28 -0700220 )
Congbin Guoa8432502023-01-23 20:31:01 -0800221
222
223def _is_podman(subcmd, proc):
224 """Return whiter proc is a podman process.
225
226 A podman pull process is like
227 'podman pull image:tag'
228 A podman run process is like
229 'podman run --option ... image:tag'
230 """
231 cmdline = proc.cmdline()
Congbin Guoba85d0b2023-01-27 18:32:21 -0800232 return proc.name() == "podman" and len(cmdline) > 1 and cmdline[1] == subcmd