blob: a346277e92707487c628345480c975ce07bd7aca [file] [log] [blame]
Allen Li51bb6122017-06-21 12:04:13 -07001# Copyright 2017 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Process metrics."""
6
7from __future__ import absolute_import
Allen Li51bb6122017-06-21 12:04:13 -07008
Allen Li3992c662018-01-05 15:26:36 -08009from functools import partial
Chris McDonald59650c32021-07-20 15:29:28 -060010import logging
Allen Li3992c662018-01-05 15:26:36 -080011
Mike Frysingercb56b642019-08-25 15:33:08 -040012import psutil # pylint: disable=import-error
Allen Li51bb6122017-06-21 12:04:13 -070013
Allen Lia9c6e802017-07-11 15:42:47 -070014from chromite.lib import metrics
Allen Li51bb6122017-06-21 12:04:13 -070015
Chris McDonald59650c32021-07-20 15:29:28 -060016
Allen Li51bb6122017-06-21 12:04:13 -070017logger = logging.getLogger(__name__)
18
Allen Lia9c6e802017-07-11 15:42:47 -070019_count_metric = metrics.GaugeMetric(
Allen Li6bb74d52017-06-22 14:44:53 -070020 'proc/count',
Allen Li51bb6122017-06-21 12:04:13 -070021 description='Number of processes currently running.')
Allen Lia9c6e802017-07-11 15:42:47 -070022_cpu_percent_metric = metrics.GaugeMetric(
Allen Li6bb74d52017-06-22 14:44:53 -070023 'proc/cpu_percent',
24 description='CPU usage percent of processes.')
Allen Li51bb6122017-06-21 12:04:13 -070025
26
27def collect_proc_info():
Allen Li6bb74d52017-06-22 14:44:53 -070028 collector = _ProcessMetricsCollector()
29 collector.collect()
30
31
32class _ProcessMetricsCollector(object):
33 """Class for collecting process metrics."""
34
35 def __init__(self):
36 self._metrics = [
Allen Li22989bd2017-07-12 10:34:37 -070037 _ProcessMetric('autoserv',
38 test_func=_is_parent_autoserv),
Congbin Guo17542e02022-06-29 13:48:15 -070039 _ProcessMetric('curl',
40 test_func=partial(_is_process_name, 'curl')),
Aviv Keshet98f33792019-10-29 11:05:53 -070041 _ProcessMetric('getty',
42 test_func=partial(_is_process_name, 'getty')),
Congbin Guo17542e02022-06-29 13:48:15 -070043 _ProcessMetric('gs_archive_server',
44 test_func=partial(_is_python_module,
45 'gs_archive_server')),
Allen Li3511a832018-06-27 14:41:01 -070046 _ProcessMetric('gs_offloader',
Congbin Guo17542e02022-06-29 13:48:15 -070047 test_func=partial(_is_process_name, 'gs_offloader.py')),
48 _ProcessMetric('gsutil',
49 test_func=_is_gsutil),
50 _ProcessMetric('java',
51 test_func=partial(_is_process_name, 'java')),
Prathmesh Prabhu5ed6f902018-05-07 14:13:02 -070052 _ProcessMetric('lxc-attach',
53 test_func=partial(_is_process_name, 'lxc-attach')),
Congbin Guo17542e02022-06-29 13:48:15 -070054 _ProcessMetric('lxc-start',
55 test_func=partial(_is_process_name, 'lxc-start')),
56 _ProcessMetric('sshd',
57 test_func=partial(_is_process_name, 'sshd')),
58 _ProcessMetric('swarming_bot',
59 test_func=_is_swarming_bot),
Allen Li3511a832018-06-27 14:41:01 -070060 _ProcessMetric('sysmon',
61 test_func=partial(_is_python_module,
62 'chromite.scripts.sysmon')),
Allen Li6bb74d52017-06-22 14:44:53 -070063 ]
64 self._other_metric = _ProcessMetric('other')
65
66 def collect(self):
67 for proc in psutil.process_iter():
68 self._collect_proc(proc)
69 self._flush()
70
71 def _collect_proc(self, proc):
Allen Li6bb74d52017-06-22 14:44:53 -070072 for metric in self._metrics:
Allen Lif8397a82017-07-13 13:19:44 -070073 if metric.add(proc):
74 break
75 else:
Allen Li6bb74d52017-06-22 14:44:53 -070076 self._other_metric.add(proc)
77
78 def _flush(self):
79 for metric in self._metrics:
80 metric.flush()
81 self._other_metric.flush()
82
83
84class _ProcessMetric(object):
85 """Class for gathering process metrics."""
86
87 def __init__(self, process_name, test_func=lambda proc: True):
88 """Initialize instance.
89
90 process_name is used to identify the metric stream.
91
92 test_func is a function called
93 for each process. If it returns True, the process is counted. The
94 default test is to count every process.
95 """
96 self._fields = {
Allen Li22989bd2017-07-12 10:34:37 -070097 'process_name': process_name,
Allen Li6bb74d52017-06-22 14:44:53 -070098 }
99 self._test_func = test_func
100 self._count = 0
101 self._cpu_percent = 0
102
103 def add(self, proc):
104 """Do metric collection for the given process.
105
106 Returns True if the process was collected.
107 """
108 if not self._test_func(proc):
109 return False
110 self._count += 1
111 self._cpu_percent += proc.cpu_percent()
112 return True
113
114 def flush(self):
115 """Finish collection and send metrics."""
116 _count_metric.set(self._count, fields=self._fields)
117 self._count = 0
Aviv Keshet0b634e92017-07-14 14:29:48 -0700118 _cpu_percent_metric.set(int(round(self._cpu_percent)), fields=self._fields)
Allen Li6bb74d52017-06-22 14:44:53 -0700119 self._cpu_percent = 0
Allen Li51bb6122017-06-21 12:04:13 -0700120
121
122def _is_parent_autoserv(proc):
123 """Return whether proc is a parent (not forked) autoserv process."""
124 return _is_autoserv(proc) and not _is_autoserv(proc.parent())
125
126
127def _is_autoserv(proc):
128 """Return whether proc is an autoserv process."""
129 # This relies on the autoserv script being run directly. The script should
130 # be named autoserv exactly and start with a shebang that is /usr/bin/python,
131 # NOT /bin/env
Prathmesh Prabhu0b795f02018-05-07 13:12:37 -0700132 return _is_process_name('autoserv', proc)
Allen Li51bb6122017-06-21 12:04:13 -0700133
134
Allen Li3992c662018-01-05 15:26:36 -0800135def _is_python_module(module, proc):
136 """Return whether proc is a process running a Python module."""
Aviv Keshet70a91c52017-07-17 16:09:09 -0700137 cmdline = proc.cmdline()
138 return (cmdline and
139 cmdline[0].endswith('python') and
Allen Li3992c662018-01-05 15:26:36 -0800140 cmdline[1:3] == ['-m', module])
141
142
Prathmesh Prabhu0b795f02018-05-07 13:12:37 -0700143def _is_process_name(name, proc):
144 """Return whether process proc is named name."""
145 return proc.name() == name
Congbin Guo17542e02022-06-29 13:48:15 -0700146
147
148def _is_swarming_bot(proc):
149 """Return whether proc is a Swarming bot.
150
151 A swarming bot process is like '/usr/bin/python3.8 <bot-zip-path> start_bot'.
152 """
153 cmdline = proc.cmdline()
154 return (len(cmdline) == 3 and
155 cmdline[0].split('/')[-1].startswith('python') and
156 cmdline[2] == 'start_bot')
157
158
159def _is_gsutil(proc):
160 """Return whether proc is gsutil."""
161 cmdline = proc.cmdline()
162 return (len(cmdline) >= 2 and
163 cmdline[0] == 'python' and
164 cmdline[1].endswith('gsutil'))