blob: b54f76508980a6445975525bb304da13a787b8f0 [file] [log] [blame]
Mike Frysingerf1ba7ad2022-09-12 05:42:57 -04001# Copyright 2017 The ChromiumOS Authors
Allen Li51bb6122017-06-21 12:04:13 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Process metrics."""
6
7from __future__ import absolute_import
Allen Li51bb6122017-06-21 12:04:13 -07008
Allen Li3992c662018-01-05 15:26:36 -08009from functools import partial
Chris McDonald59650c32021-07-20 15:29:28 -060010import logging
Allen Li3992c662018-01-05 15:26:36 -080011
Mike Frysingercb56b642019-08-25 15:33:08 -040012import psutil # pylint: disable=import-error
Allen Li51bb6122017-06-21 12:04:13 -070013
Allen Lia9c6e802017-07-11 15:42:47 -070014from chromite.lib import metrics
Allen Li51bb6122017-06-21 12:04:13 -070015
Chris McDonald59650c32021-07-20 15:29:28 -060016
Allen Li51bb6122017-06-21 12:04:13 -070017logger = logging.getLogger(__name__)
18
Allen Lia9c6e802017-07-11 15:42:47 -070019_count_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060020 "proc/count", description="Number of processes currently running."
21)
Allen Lia9c6e802017-07-11 15:42:47 -070022_cpu_percent_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060023 "proc/cpu_percent", description="CPU usage percent of processes."
24)
Allen Li51bb6122017-06-21 12:04:13 -070025
26
27def collect_proc_info():
Alex Klein1699fab2022-09-08 08:46:06 -060028 collector = _ProcessMetricsCollector()
29 collector.collect()
Allen Li6bb74d52017-06-22 14:44:53 -070030
31
32class _ProcessMetricsCollector(object):
Alex Klein1699fab2022-09-08 08:46:06 -060033 """Class for collecting process metrics."""
Allen Li6bb74d52017-06-22 14:44:53 -070034
Alex Klein1699fab2022-09-08 08:46:06 -060035 def __init__(self):
36 self._metrics = [
37 _ProcessMetric("autoserv", test_func=_is_parent_autoserv),
Congbin Guo522cd982022-10-06 11:47:28 -070038 _ProcessMetric(
39 "common-tls", test_func=partial(_is_process_name, "common-tls")
40 ),
Alex Klein1699fab2022-09-08 08:46:06 -060041 _ProcessMetric("curl", test_func=partial(_is_process_name, "curl")),
42 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070043 "dnsmasq", test_func=partial(_is_process_name, "dnsmasq")
Alex Klein1699fab2022-09-08 08:46:06 -060044 ),
45 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070046 "drone-agent",
47 test_func=partial(_is_process_name, "drone-agent")
48 ),
49 _ProcessMetric(
50 "fleet-tlw", test_func=partial(_is_process_name, "fleet-tlw")
51 ),
52 _ProcessMetric(
53 "getty", test_func=partial(_is_process_name, "getty")
Alex Klein1699fab2022-09-08 08:46:06 -060054 ),
55 _ProcessMetric(
56 "gs_offloader",
57 test_func=partial(_is_process_name, "gs_offloader.py"),
58 ),
59 _ProcessMetric("gsutil", test_func=_is_gsutil),
60 _ProcessMetric("java", test_func=partial(_is_process_name, "java")),
61 _ProcessMetric(
Congbin Guo522cd982022-10-06 11:47:28 -070062 "labservice", test_func=partial(_is_process_name, "labservice")
63 ),
64 _ProcessMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060065 "lxc-attach", test_func=partial(_is_process_name, "lxc-attach")
66 ),
67 _ProcessMetric(
68 "lxc-start", test_func=partial(_is_process_name, "lxc-start")
69 ),
70 _ProcessMetric("sshd", test_func=partial(_is_process_name, "sshd")),
71 _ProcessMetric("swarming_bot", test_func=_is_swarming_bot),
72 _ProcessMetric(
73 "sysmon",
74 test_func=partial(_is_python_module, "chromite.scripts.sysmon"),
75 ),
Congbin Guo522cd982022-10-06 11:47:28 -070076 _ProcessMetric("tko_proxy", test_func=_is_tko_proxy),
Alex Klein1699fab2022-09-08 08:46:06 -060077 ]
78 self._other_metric = _ProcessMetric("other")
Allen Li6bb74d52017-06-22 14:44:53 -070079
Alex Klein1699fab2022-09-08 08:46:06 -060080 def collect(self):
81 for proc in psutil.process_iter():
82 self._collect_proc(proc)
83 self._flush()
Allen Li6bb74d52017-06-22 14:44:53 -070084
Alex Klein1699fab2022-09-08 08:46:06 -060085 def _collect_proc(self, proc):
86 for metric in self._metrics:
87 if metric.add(proc):
88 break
89 else:
90 self._other_metric.add(proc)
Allen Li6bb74d52017-06-22 14:44:53 -070091
Alex Klein1699fab2022-09-08 08:46:06 -060092 def _flush(self):
93 for metric in self._metrics:
94 metric.flush()
95 self._other_metric.flush()
Allen Li6bb74d52017-06-22 14:44:53 -070096
97
98class _ProcessMetric(object):
Alex Klein1699fab2022-09-08 08:46:06 -060099 """Class for gathering process metrics."""
Allen Li6bb74d52017-06-22 14:44:53 -0700100
Alex Klein1699fab2022-09-08 08:46:06 -0600101 def __init__(self, process_name, test_func=lambda proc: True):
102 """Initialize instance.
Allen Li6bb74d52017-06-22 14:44:53 -0700103
Alex Klein1699fab2022-09-08 08:46:06 -0600104 process_name is used to identify the metric stream.
Allen Li6bb74d52017-06-22 14:44:53 -0700105
Alex Klein1699fab2022-09-08 08:46:06 -0600106 test_func is a function called
107 for each process. If it returns True, the process is counted. The
108 default test is to count every process.
109 """
110 self._fields = {
111 "process_name": process_name,
112 }
113 self._test_func = test_func
114 self._count = 0
115 self._cpu_percent = 0
Allen Li6bb74d52017-06-22 14:44:53 -0700116
Alex Klein1699fab2022-09-08 08:46:06 -0600117 def add(self, proc):
118 """Do metric collection for the given process.
Allen Li6bb74d52017-06-22 14:44:53 -0700119
Alex Klein1699fab2022-09-08 08:46:06 -0600120 Returns True if the process was collected.
121 """
122 if not self._test_func(proc):
123 return False
124 self._count += 1
125 self._cpu_percent += proc.cpu_percent()
126 return True
Allen Li6bb74d52017-06-22 14:44:53 -0700127
Alex Klein1699fab2022-09-08 08:46:06 -0600128 def flush(self):
129 """Finish collection and send metrics."""
130 _count_metric.set(self._count, fields=self._fields)
131 self._count = 0
132 _cpu_percent_metric.set(
133 int(round(self._cpu_percent)), fields=self._fields
134 )
135 self._cpu_percent = 0
Allen Li51bb6122017-06-21 12:04:13 -0700136
137
138def _is_parent_autoserv(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600139 """Return whether proc is a parent (not forked) autoserv process."""
140 return _is_autoserv(proc) and not _is_autoserv(proc.parent())
Allen Li51bb6122017-06-21 12:04:13 -0700141
142
143def _is_autoserv(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600144 """Return whether proc is an autoserv process."""
145 # This relies on the autoserv script being run directly. The script should
146 # be named autoserv exactly and start with a shebang that is /usr/bin/python,
147 # NOT /bin/env
148 return _is_process_name("autoserv", proc)
Allen Li51bb6122017-06-21 12:04:13 -0700149
150
Allen Li3992c662018-01-05 15:26:36 -0800151def _is_python_module(module, proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600152 """Return whether proc is a process running a Python module."""
153 cmdline = proc.cmdline()
154 return (
155 cmdline
156 and cmdline[0].endswith("python")
157 and cmdline[1:3] == ["-m", module]
158 )
Allen Li3992c662018-01-05 15:26:36 -0800159
160
Prathmesh Prabhu0b795f02018-05-07 13:12:37 -0700161def _is_process_name(name, proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600162 """Return whether process proc is named name."""
163 return proc.name() == name
Congbin Guo17542e02022-06-29 13:48:15 -0700164
165
166def _is_swarming_bot(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600167 """Return whether proc is a Swarming bot.
Congbin Guo17542e02022-06-29 13:48:15 -0700168
Alex Klein1699fab2022-09-08 08:46:06 -0600169 A swarming bot process is like '/usr/bin/python3.8 <bot-zip-path> start_bot'.
170 """
171 cmdline = proc.cmdline()
172 return (
173 len(cmdline) == 3
174 and cmdline[0].split("/")[-1].startswith("python")
175 and cmdline[2] == "start_bot"
176 )
Congbin Guo17542e02022-06-29 13:48:15 -0700177
178
179def _is_gsutil(proc):
Alex Klein1699fab2022-09-08 08:46:06 -0600180 """Return whether proc is gsutil."""
181 cmdline = proc.cmdline()
182 return (
183 len(cmdline) >= 2
184 and cmdline[0] == "python"
185 and cmdline[1].endswith("gsutil")
186 )
Congbin Guo522cd982022-10-06 11:47:28 -0700187
188
189def _is_tko_proxy(proc):
190 """Return whether proc is a tko proxy.
191
192 A tk proxy process is like
193 '/opt/cloud_sql_proxy -dir=<...>
194 -instances=google.com:chromeos-lab:us-central1:tko
195 -credential_file=<...>'.
196 """
197 cmdline = proc.cmdline()
198 return (
199 len(cmdline) == 4
200 and cmdline[0].split("/")[-1] == 'cloud_sql_proxy'
201 and cmdline[2] == '-instances=google.com:chromeos-lab:us-central1:tko'
202 )