blob: 2cafde5af6afab53e6b4f5bfad1ee34c96d9dfb9 [file] [log] [blame]
Allen Liec5beb32016-09-08 15:31:41 -07001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Allen Liec5beb32016-09-08 15:31:41 -07005"""System metrics."""
6
Allen Li13bdf0c2017-03-02 15:18:16 -08007from __future__ import absolute_import
Allen Liec5beb32016-09-08 15:31:41 -07008
9import errno
Chris McDonald59650c32021-07-20 15:29:28 -060010import logging
Allen Liec5beb32016-09-08 15:31:41 -070011import os
Allen Liec5beb32016-09-08 15:31:41 -070012import time
13
Mike Frysingercb56b642019-08-25 15:33:08 -040014import psutil # pylint: disable=import-error
Allen Liec5beb32016-09-08 15:31:41 -070015
Allen Lia9c6e802017-07-11 15:42:47 -070016from chromite.lib import metrics
Allen Liec5beb32016-09-08 15:31:41 -070017
Chris McDonald59650c32021-07-20 15:29:28 -060018
Allen Li79317bb2016-12-16 18:25:07 -080019logger = logging.getLogger(__name__)
20
Allen Liec5beb32016-09-08 15:31:41 -070021
Allen Lia9c6e802017-07-11 15:42:47 -070022_cpu_count_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070023 'dev/cpu/count',
24 description='Number of CPU cores.')
Allen Lia9c6e802017-07-11 15:42:47 -070025_cpu_time_metric = metrics.FloatMetric(
Allen Lia6b02252016-10-26 14:40:51 -070026 'dev/cpu/time',
27 description='percentage of time spent by the CPU '
28 'in different states.')
Allen Liec5beb32016-09-08 15:31:41 -070029
Allen Lia9c6e802017-07-11 15:42:47 -070030_disk_free_metric = metrics.GaugeMetric(
Allen Liec5beb32016-09-08 15:31:41 -070031 'dev/disk/free',
Allen Li22989bd2017-07-12 10:34:37 -070032 description='Available bytes on disk partition.')
Allen Lia9c6e802017-07-11 15:42:47 -070033_disk_total_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070034 'dev/disk/total',
Allen Li22989bd2017-07-12 10:34:37 -070035 description='Total bytes on disk partition.')
Allen Liec5beb32016-09-08 15:31:41 -070036
Allen Lia9c6e802017-07-11 15:42:47 -070037_inodes_free_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070038 'dev/inodes/free',
39 description='Number of available inodes on '
40 'disk partition (unix only).')
Allen Lia9c6e802017-07-11 15:42:47 -070041_inodes_total_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070042 'dev/inodes/total',
43 description='Number of possible inodes on '
44 'disk partition (unix only)')
Allen Liec5beb32016-09-08 15:31:41 -070045
Allen Lia9c6e802017-07-11 15:42:47 -070046_mem_free_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070047 'dev/mem/free',
48 description='Amount of memory available to a '
49 'process (in Bytes). Buffers are considered '
Allen Li22989bd2017-07-12 10:34:37 -070050 'free memory.')
Allen Liec5beb32016-09-08 15:31:41 -070051
Allen Lia9c6e802017-07-11 15:42:47 -070052_mem_total_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070053 'dev/mem/total',
Allen Li22989bd2017-07-12 10:34:37 -070054 description='Total physical memory in Bytes.')
Allen Liec5beb32016-09-08 15:31:41 -070055
Allen Li325c0762017-03-02 15:00:19 -080056_BOOT_TIME = psutil.boot_time()
Allen Lic987fc92017-03-02 14:54:51 -080057
Allen Lia9c6e802017-07-11 15:42:47 -070058_disk_read_metric = metrics.CounterMetric(
Allen Li325c0762017-03-02 15:00:19 -080059 'dev/disk/read', start_time=_BOOT_TIME,
Allen Li22989bd2017-07-12 10:34:37 -070060 description='Number of Bytes read on disk.')
Allen Lia9c6e802017-07-11 15:42:47 -070061_disk_write_metric = metrics.CounterMetric(
Allen Li325c0762017-03-02 15:00:19 -080062 'dev/disk/write', start_time=_BOOT_TIME,
Allen Li22989bd2017-07-12 10:34:37 -070063 description='Number of Bytes written on disk.')
Allen Liec5beb32016-09-08 15:31:41 -070064
Allen Lia9c6e802017-07-11 15:42:47 -070065_uptime_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070066 'dev/uptime',
Allen Li22989bd2017-07-12 10:34:37 -070067 description='Machine uptime, in seconds.')
Allen Liec5beb32016-09-08 15:31:41 -070068
Allen Lia9c6e802017-07-11 15:42:47 -070069_load_average_metric = metrics.FloatMetric(
Allen Lia6b02252016-10-26 14:40:51 -070070 'dev/proc/load_average',
71 description='Number of processes currently '
72 'in the system run queue.')
Allen Liec5beb32016-09-08 15:31:41 -070073
Allen Lia6b02252016-10-26 14:40:51 -070074# ts_mon pipeline uses backend clocks when assigning timestamps to metric
75# points. By comparing point timestamp to the point value (i.e. time by
76# machine's local clock), we can potentially detect some anomalies (clock
77# drift, unusually high metrics pipeline delay, completely wrong clocks, etc).
Allen Liec5beb32016-09-08 15:31:41 -070078#
79# It is important to gather this metric right before the flush.
Allen Lia9c6e802017-07-11 15:42:47 -070080_unix_time_metric = metrics.GaugeMetric(
Allen Liec5beb32016-09-08 15:31:41 -070081 'dev/unix_time',
Allen Lia6b02252016-10-26 14:40:51 -070082 description='Number of milliseconds since epoch'
83 ' based on local machine clock.')
Allen Liec5beb32016-09-08 15:31:41 -070084
Allen Lia9c6e802017-07-11 15:42:47 -070085_os_name_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -070086 'proc/os/name',
87 description='OS name on the machine')
Allen Liec5beb32016-09-08 15:31:41 -070088
Allen Lia9c6e802017-07-11 15:42:47 -070089_os_version_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -070090 'proc/os/version',
91 description='OS version on the machine')
Allen Liec5beb32016-09-08 15:31:41 -070092
Allen Lia9c6e802017-07-11 15:42:47 -070093_os_arch_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -070094 'proc/os/arch',
95 description='OS architecture on this machine')
Allen Liec5beb32016-09-08 15:31:41 -070096
Allen Lia9c6e802017-07-11 15:42:47 -070097_python_arch_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -070098 'proc/python/arch',
99 description='python userland '
100 'architecture on this machine')
Allen Liec5beb32016-09-08 15:31:41 -0700101
102
Allen Li45ae8392017-03-02 14:19:35 -0800103def collect_uptime():
Allen Li325c0762017-03-02 15:00:19 -0800104 _uptime_metric.set(int(time.time() - _BOOT_TIME))
Allen Liec5beb32016-09-08 15:31:41 -0700105
106
Allen Li45ae8392017-03-02 14:19:35 -0800107def collect_cpu_info():
Allen Lia6b02252016-10-26 14:40:51 -0700108 _cpu_count_metric.set(psutil.cpu_count())
Allen Liec5beb32016-09-08 15:31:41 -0700109
110 times = psutil.cpu_times_percent()
111 for mode in ('user', 'system', 'idle'):
Allen Lia6b02252016-10-26 14:40:51 -0700112 _cpu_time_metric.set(getattr(times, mode), {'mode': mode})
Allen Liec5beb32016-09-08 15:31:41 -0700113
114
Allen Li45ae8392017-03-02 14:19:35 -0800115def collect_disk_info(mountpoints=None):
Allen Liec5beb32016-09-08 15:31:41 -0700116 if mountpoints is None:
117 mountpoints = [disk.mountpoint for disk in psutil.disk_partitions()]
Allen Liec5beb32016-09-08 15:31:41 -0700118 for mountpoint in mountpoints:
Allen Li45ae8392017-03-02 14:19:35 -0800119 _collect_disk_info_single(mountpoint)
120 _collect_fs_inode_info(mountpoint)
121 _collect_disk_io_info()
Allen Liec5beb32016-09-08 15:31:41 -0700122
Allen Liec5beb32016-09-08 15:31:41 -0700123
Allen Li45ae8392017-03-02 14:19:35 -0800124def _collect_disk_info_single(mountpoint):
Allen Lia6b02252016-10-26 14:40:51 -0700125 fields = {'path': mountpoint}
Allen Liec5beb32016-09-08 15:31:41 -0700126
127 try:
Allen Lia6b02252016-10-26 14:40:51 -0700128 usage = psutil.disk_usage(mountpoint)
129 except OSError as ex:
130 if ex.errno == errno.ENOENT:
131 # This happens on Windows when querying a removable drive that
132 # doesn't have any media inserted right now.
133 pass
134 else:
135 raise
136 else:
137 _disk_free_metric.set(usage.free, fields=fields)
138 _disk_total_metric.set(usage.total, fields=fields)
139
140 # inode counts are only available on Unix.
141 if os.name == 'posix':
Allen Li45ae8392017-03-02 14:19:35 -0800142 _collect_fs_inode_info(mountpoint)
Allen Lia6b02252016-10-26 14:40:51 -0700143
144
Allen Li45ae8392017-03-02 14:19:35 -0800145def _collect_fs_inode_info(mountpoint):
Allen Lia6b02252016-10-26 14:40:51 -0700146 fields = {'path': mountpoint}
147 stats = os.statvfs(mountpoint)
148 _inodes_free_metric.set(stats.f_favail, fields=fields)
149 _inodes_total_metric.set(stats.f_files, fields=fields)
150
151
Allen Li45ae8392017-03-02 14:19:35 -0800152def _collect_disk_io_info():
Allen Lia6b02252016-10-26 14:40:51 -0700153 try:
Mike Frysingerabb7d812020-05-15 00:13:10 -0400154 # pylint: disable=dict-items-not-iterating
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400155 disk_counters = psutil.disk_io_counters(perdisk=True).items()
Allen Liec5beb32016-09-08 15:31:41 -0700156 except RuntimeError as ex:
157 if "couldn't find any physical disk" in str(ex):
158 # Disk performance counters aren't enabled on Windows.
159 pass
160 else:
161 raise
Allen Lia6b02252016-10-26 14:40:51 -0700162 else:
163 for disk, counters in disk_counters:
164 fields = {'disk': disk}
165 _disk_read_metric.set(counters.read_bytes, fields=fields)
166 _disk_write_metric.set(counters.write_bytes, fields=fields)
Allen Liec5beb32016-09-08 15:31:41 -0700167
168
Allen Li45ae8392017-03-02 14:19:35 -0800169def collect_mem_info():
Allen Liec5beb32016-09-08 15:31:41 -0700170 # We don't report mem.used because (due to virtual memory) it is not
171 # useful.
172 mem = psutil.virtual_memory()
Allen Lia6b02252016-10-26 14:40:51 -0700173 _mem_free_metric.set(mem.available)
174 _mem_total_metric.set(mem.total)
Allen Liec5beb32016-09-08 15:31:41 -0700175
176
Allen Li45ae8392017-03-02 14:19:35 -0800177def collect_load_avg():
Allen Lia6b02252016-10-26 14:40:51 -0700178 try:
179 avg1, avg5, avg15 = os.getloadavg()
180 except OSError:
181 pass
182 else:
183 _load_average_metric.set(avg1, fields={'minutes': 1})
184 _load_average_metric.set(avg5, fields={'minutes': 5})
185 _load_average_metric.set(avg15, fields={'minutes': 15})
Allen Liec5beb32016-09-08 15:31:41 -0700186
187
Allen Li45ae8392017-03-02 14:19:35 -0800188def collect_unix_time():
Allen Lia6b02252016-10-26 14:40:51 -0700189 _unix_time_metric.set(int(time.time() * 1000))