blob: b88b8629d24558d0e672a6dbb3a8ee6994615f3c [file] [log] [blame]
Allen Liec5beb32016-09-08 15:31:41 -07001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Allen Liec5beb32016-09-08 15:31:41 -07005"""System metrics."""
6
Allen Li13bdf0c2017-03-02 15:18:16 -08007from __future__ import absolute_import
Allen Liec5beb32016-09-08 15:31:41 -07008
9import errno
Chris McDonald59650c32021-07-20 15:29:28 -060010import logging
Allen Liec5beb32016-09-08 15:31:41 -070011import os
Allen Liec5beb32016-09-08 15:31:41 -070012import time
13
Mike Frysingercb56b642019-08-25 15:33:08 -040014import psutil # pylint: disable=import-error
Allen Liec5beb32016-09-08 15:31:41 -070015
Allen Lia9c6e802017-07-11 15:42:47 -070016from chromite.lib import metrics
Allen Liec5beb32016-09-08 15:31:41 -070017
Chris McDonald59650c32021-07-20 15:29:28 -060018
Allen Li79317bb2016-12-16 18:25:07 -080019logger = logging.getLogger(__name__)
20
Allen Liec5beb32016-09-08 15:31:41 -070021
Allen Lia9c6e802017-07-11 15:42:47 -070022_cpu_count_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060023 "dev/cpu/count", description="Number of CPU cores."
24)
Allen Lia9c6e802017-07-11 15:42:47 -070025_cpu_time_metric = metrics.FloatMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060026 "dev/cpu/time",
27 description="percentage of time spent by the CPU " "in different states.",
28)
Allen Liec5beb32016-09-08 15:31:41 -070029
Allen Lia9c6e802017-07-11 15:42:47 -070030_disk_free_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060031 "dev/disk/free", description="Available bytes on disk partition."
32)
Allen Lia9c6e802017-07-11 15:42:47 -070033_disk_total_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060034 "dev/disk/total", description="Total bytes on disk partition."
35)
Allen Liec5beb32016-09-08 15:31:41 -070036
Allen Lia9c6e802017-07-11 15:42:47 -070037_inodes_free_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060038 "dev/inodes/free",
39 description="Number of available inodes on " "disk partition (unix only).",
40)
Allen Lia9c6e802017-07-11 15:42:47 -070041_inodes_total_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060042 "dev/inodes/total",
43 description="Number of possible inodes on " "disk partition (unix only)",
44)
Allen Liec5beb32016-09-08 15:31:41 -070045
Allen Lia9c6e802017-07-11 15:42:47 -070046_mem_free_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060047 "dev/mem/free",
48 description="Amount of memory available to a "
49 "process (in Bytes). Buffers are considered "
50 "free memory.",
51)
Allen Liec5beb32016-09-08 15:31:41 -070052
Allen Lia9c6e802017-07-11 15:42:47 -070053_mem_total_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060054 "dev/mem/total", description="Total physical memory in Bytes."
55)
Allen Liec5beb32016-09-08 15:31:41 -070056
Allen Li325c0762017-03-02 15:00:19 -080057_BOOT_TIME = psutil.boot_time()
Allen Lic987fc92017-03-02 14:54:51 -080058
Allen Lia9c6e802017-07-11 15:42:47 -070059_disk_read_metric = metrics.CounterMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060060 "dev/disk/read",
61 start_time=_BOOT_TIME,
62 description="Number of Bytes read on disk.",
63)
Allen Lia9c6e802017-07-11 15:42:47 -070064_disk_write_metric = metrics.CounterMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060065 "dev/disk/write",
66 start_time=_BOOT_TIME,
67 description="Number of Bytes written on disk.",
68)
Allen Liec5beb32016-09-08 15:31:41 -070069
Allen Lia9c6e802017-07-11 15:42:47 -070070_uptime_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060071 "dev/uptime", description="Machine uptime, in seconds."
72)
Allen Liec5beb32016-09-08 15:31:41 -070073
Allen Lia9c6e802017-07-11 15:42:47 -070074_load_average_metric = metrics.FloatMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060075 "dev/proc/load_average",
76 description="Number of processes currently " "in the system run queue.",
77)
Allen Liec5beb32016-09-08 15:31:41 -070078
Allen Lia6b02252016-10-26 14:40:51 -070079# ts_mon pipeline uses backend clocks when assigning timestamps to metric
80# points. By comparing point timestamp to the point value (i.e. time by
81# machine's local clock), we can potentially detect some anomalies (clock
82# drift, unusually high metrics pipeline delay, completely wrong clocks, etc).
Allen Liec5beb32016-09-08 15:31:41 -070083#
84# It is important to gather this metric right before the flush.
Allen Lia9c6e802017-07-11 15:42:47 -070085_unix_time_metric = metrics.GaugeMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060086 "dev/unix_time",
87 description="Number of milliseconds since epoch"
88 " based on local machine clock.",
89)
Allen Liec5beb32016-09-08 15:31:41 -070090
Allen Lia9c6e802017-07-11 15:42:47 -070091_os_name_metric = metrics.StringMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060092 "proc/os/name", description="OS name on the machine"
93)
Allen Liec5beb32016-09-08 15:31:41 -070094
Allen Lia9c6e802017-07-11 15:42:47 -070095_os_version_metric = metrics.StringMetric(
Alex Klein1699fab2022-09-08 08:46:06 -060096 "proc/os/version", description="OS version on the machine"
97)
Allen Liec5beb32016-09-08 15:31:41 -070098
Allen Lia9c6e802017-07-11 15:42:47 -070099_os_arch_metric = metrics.StringMetric(
Alex Klein1699fab2022-09-08 08:46:06 -0600100 "proc/os/arch", description="OS architecture on this machine"
101)
Allen Liec5beb32016-09-08 15:31:41 -0700102
Allen Lia9c6e802017-07-11 15:42:47 -0700103_python_arch_metric = metrics.StringMetric(
Alex Klein1699fab2022-09-08 08:46:06 -0600104 "proc/python/arch",
105 description="python userland " "architecture on this machine",
106)
Allen Liec5beb32016-09-08 15:31:41 -0700107
108
Allen Li45ae8392017-03-02 14:19:35 -0800109def collect_uptime():
Alex Klein1699fab2022-09-08 08:46:06 -0600110 _uptime_metric.set(int(time.time() - _BOOT_TIME))
Allen Liec5beb32016-09-08 15:31:41 -0700111
112
Allen Li45ae8392017-03-02 14:19:35 -0800113def collect_cpu_info():
Alex Klein1699fab2022-09-08 08:46:06 -0600114 _cpu_count_metric.set(psutil.cpu_count())
Allen Liec5beb32016-09-08 15:31:41 -0700115
Alex Klein1699fab2022-09-08 08:46:06 -0600116 times = psutil.cpu_times_percent()
117 for mode in ("user", "system", "idle"):
118 _cpu_time_metric.set(getattr(times, mode), {"mode": mode})
Allen Liec5beb32016-09-08 15:31:41 -0700119
120
Allen Li45ae8392017-03-02 14:19:35 -0800121def collect_disk_info(mountpoints=None):
Alex Klein1699fab2022-09-08 08:46:06 -0600122 if mountpoints is None:
123 mountpoints = [disk.mountpoint for disk in psutil.disk_partitions()]
124 for mountpoint in mountpoints:
125 _collect_disk_info_single(mountpoint)
126 _collect_fs_inode_info(mountpoint)
127 _collect_disk_io_info()
Allen Liec5beb32016-09-08 15:31:41 -0700128
Allen Liec5beb32016-09-08 15:31:41 -0700129
Allen Li45ae8392017-03-02 14:19:35 -0800130def _collect_disk_info_single(mountpoint):
Alex Klein1699fab2022-09-08 08:46:06 -0600131 fields = {"path": mountpoint}
Allen Liec5beb32016-09-08 15:31:41 -0700132
Alex Klein1699fab2022-09-08 08:46:06 -0600133 try:
134 usage = psutil.disk_usage(mountpoint)
135 except OSError as ex:
136 if ex.errno == errno.ENOENT:
137 # This happens on Windows when querying a removable drive that
138 # doesn't have any media inserted right now.
139 pass
140 else:
141 raise
Allen Lia6b02252016-10-26 14:40:51 -0700142 else:
Alex Klein1699fab2022-09-08 08:46:06 -0600143 _disk_free_metric.set(usage.free, fields=fields)
144 _disk_total_metric.set(usage.total, fields=fields)
Allen Lia6b02252016-10-26 14:40:51 -0700145
Alex Klein1699fab2022-09-08 08:46:06 -0600146 # inode counts are only available on Unix.
147 if os.name == "posix":
148 _collect_fs_inode_info(mountpoint)
Allen Lia6b02252016-10-26 14:40:51 -0700149
150
Allen Li45ae8392017-03-02 14:19:35 -0800151def _collect_fs_inode_info(mountpoint):
Alex Klein1699fab2022-09-08 08:46:06 -0600152 fields = {"path": mountpoint}
153 stats = os.statvfs(mountpoint)
154 _inodes_free_metric.set(stats.f_favail, fields=fields)
155 _inodes_total_metric.set(stats.f_files, fields=fields)
Allen Lia6b02252016-10-26 14:40:51 -0700156
157
Allen Li45ae8392017-03-02 14:19:35 -0800158def _collect_disk_io_info():
Alex Klein1699fab2022-09-08 08:46:06 -0600159 try:
160 # pylint: disable=dict-items-not-iterating
161 disk_counters = psutil.disk_io_counters(perdisk=True).items()
162 except RuntimeError as ex:
163 if "couldn't find any physical disk" in str(ex):
164 # Disk performance counters aren't enabled on Windows.
165 pass
166 else:
167 raise
Allen Liec5beb32016-09-08 15:31:41 -0700168 else:
Alex Klein1699fab2022-09-08 08:46:06 -0600169 for disk, counters in disk_counters:
170 fields = {"disk": disk}
171 _disk_read_metric.set(counters.read_bytes, fields=fields)
172 _disk_write_metric.set(counters.write_bytes, fields=fields)
Allen Liec5beb32016-09-08 15:31:41 -0700173
174
Allen Li45ae8392017-03-02 14:19:35 -0800175def collect_mem_info():
Alex Klein1699fab2022-09-08 08:46:06 -0600176 # We don't report mem.used because (due to virtual memory) it is not
177 # useful.
178 mem = psutil.virtual_memory()
179 _mem_free_metric.set(mem.available)
180 _mem_total_metric.set(mem.total)
Allen Liec5beb32016-09-08 15:31:41 -0700181
182
Allen Li45ae8392017-03-02 14:19:35 -0800183def collect_load_avg():
Alex Klein1699fab2022-09-08 08:46:06 -0600184 try:
185 avg1, avg5, avg15 = os.getloadavg()
186 except OSError:
187 pass
188 else:
189 _load_average_metric.set(avg1, fields={"minutes": 1})
190 _load_average_metric.set(avg5, fields={"minutes": 5})
191 _load_average_metric.set(avg15, fields={"minutes": 15})
Allen Liec5beb32016-09-08 15:31:41 -0700192
193
Allen Li45ae8392017-03-02 14:19:35 -0800194def collect_unix_time():
Alex Klein1699fab2022-09-08 08:46:06 -0600195 _unix_time_metric.set(int(time.time() * 1000))