blob: f27cfcd9ada605363f6e0fe5de0c35a8c95286e6 [file] [log] [blame]
Mike Frysingere58c0e22017-10-04 15:43:30 -04001# -*- coding: utf-8 -*-
Allen Liec5beb32016-09-08 15:31:41 -07002# Copyright 2016 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Allen Liec5beb32016-09-08 15:31:41 -07006"""System metrics."""
7
Allen Li13bdf0c2017-03-02 15:18:16 -08008from __future__ import absolute_import
Allen Liec5beb32016-09-08 15:31:41 -07009from __future__ import print_function
10
11import errno
12import os
Allen Liec5beb32016-09-08 15:31:41 -070013import time
14
Mike Frysingercb56b642019-08-25 15:33:08 -040015import psutil # pylint: disable=import-error
Allen Liec5beb32016-09-08 15:31:41 -070016
17from chromite.lib import cros_logging as logging
Allen Lia9c6e802017-07-11 15:42:47 -070018from chromite.lib import metrics
Allen Liec5beb32016-09-08 15:31:41 -070019
Allen Li79317bb2016-12-16 18:25:07 -080020logger = logging.getLogger(__name__)
21
Allen Liec5beb32016-09-08 15:31:41 -070022
Allen Lia9c6e802017-07-11 15:42:47 -070023_cpu_count_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070024 'dev/cpu/count',
25 description='Number of CPU cores.')
Allen Lia9c6e802017-07-11 15:42:47 -070026_cpu_time_metric = metrics.FloatMetric(
Allen Lia6b02252016-10-26 14:40:51 -070027 'dev/cpu/time',
28 description='percentage of time spent by the CPU '
29 'in different states.')
Allen Liec5beb32016-09-08 15:31:41 -070030
Allen Lia9c6e802017-07-11 15:42:47 -070031_disk_free_metric = metrics.GaugeMetric(
Allen Liec5beb32016-09-08 15:31:41 -070032 'dev/disk/free',
Allen Li22989bd2017-07-12 10:34:37 -070033 description='Available bytes on disk partition.')
Allen Lia9c6e802017-07-11 15:42:47 -070034_disk_total_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070035 'dev/disk/total',
Allen Li22989bd2017-07-12 10:34:37 -070036 description='Total bytes on disk partition.')
Allen Liec5beb32016-09-08 15:31:41 -070037
Allen Lia9c6e802017-07-11 15:42:47 -070038_inodes_free_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070039 'dev/inodes/free',
40 description='Number of available inodes on '
41 'disk partition (unix only).')
Allen Lia9c6e802017-07-11 15:42:47 -070042_inodes_total_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070043 'dev/inodes/total',
44 description='Number of possible inodes on '
45 'disk partition (unix only)')
Allen Liec5beb32016-09-08 15:31:41 -070046
Allen Lia9c6e802017-07-11 15:42:47 -070047_mem_free_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070048 'dev/mem/free',
49 description='Amount of memory available to a '
50 'process (in Bytes). Buffers are considered '
Allen Li22989bd2017-07-12 10:34:37 -070051 'free memory.')
Allen Liec5beb32016-09-08 15:31:41 -070052
Allen Lia9c6e802017-07-11 15:42:47 -070053_mem_total_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070054 'dev/mem/total',
Allen Li22989bd2017-07-12 10:34:37 -070055 description='Total physical memory in Bytes.')
Allen Liec5beb32016-09-08 15:31:41 -070056
Allen Li325c0762017-03-02 15:00:19 -080057_BOOT_TIME = psutil.boot_time()
Allen Lic987fc92017-03-02 14:54:51 -080058
Allen Lia9c6e802017-07-11 15:42:47 -070059_disk_read_metric = metrics.CounterMetric(
Allen Li325c0762017-03-02 15:00:19 -080060 'dev/disk/read', start_time=_BOOT_TIME,
Allen Li22989bd2017-07-12 10:34:37 -070061 description='Number of Bytes read on disk.')
Allen Lia9c6e802017-07-11 15:42:47 -070062_disk_write_metric = metrics.CounterMetric(
Allen Li325c0762017-03-02 15:00:19 -080063 'dev/disk/write', start_time=_BOOT_TIME,
Allen Li22989bd2017-07-12 10:34:37 -070064 description='Number of Bytes written on disk.')
Allen Liec5beb32016-09-08 15:31:41 -070065
Allen Lia9c6e802017-07-11 15:42:47 -070066_uptime_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070067 'dev/uptime',
Allen Li22989bd2017-07-12 10:34:37 -070068 description='Machine uptime, in seconds.')
Allen Liec5beb32016-09-08 15:31:41 -070069
Allen Lia9c6e802017-07-11 15:42:47 -070070_load_average_metric = metrics.FloatMetric(
Allen Lia6b02252016-10-26 14:40:51 -070071 'dev/proc/load_average',
72 description='Number of processes currently '
73 'in the system run queue.')
Allen Liec5beb32016-09-08 15:31:41 -070074
Allen Lia6b02252016-10-26 14:40:51 -070075# ts_mon pipeline uses backend clocks when assigning timestamps to metric
76# points. By comparing point timestamp to the point value (i.e. time by
77# machine's local clock), we can potentially detect some anomalies (clock
78# drift, unusually high metrics pipeline delay, completely wrong clocks, etc).
Allen Liec5beb32016-09-08 15:31:41 -070079#
80# It is important to gather this metric right before the flush.
Allen Lia9c6e802017-07-11 15:42:47 -070081_unix_time_metric = metrics.GaugeMetric(
Allen Liec5beb32016-09-08 15:31:41 -070082 'dev/unix_time',
Allen Lia6b02252016-10-26 14:40:51 -070083 description='Number of milliseconds since epoch'
84 ' based on local machine clock.')
Allen Liec5beb32016-09-08 15:31:41 -070085
Allen Lia9c6e802017-07-11 15:42:47 -070086_os_name_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -070087 'proc/os/name',
88 description='OS name on the machine')
Allen Liec5beb32016-09-08 15:31:41 -070089
Allen Lia9c6e802017-07-11 15:42:47 -070090_os_version_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -070091 'proc/os/version',
92 description='OS version on the machine')
Allen Liec5beb32016-09-08 15:31:41 -070093
Allen Lia9c6e802017-07-11 15:42:47 -070094_os_arch_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -070095 'proc/os/arch',
96 description='OS architecture on this machine')
Allen Liec5beb32016-09-08 15:31:41 -070097
Allen Lia9c6e802017-07-11 15:42:47 -070098_python_arch_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -070099 'proc/python/arch',
100 description='python userland '
101 'architecture on this machine')
Allen Liec5beb32016-09-08 15:31:41 -0700102
103
Allen Li45ae8392017-03-02 14:19:35 -0800104def collect_uptime():
Allen Li325c0762017-03-02 15:00:19 -0800105 _uptime_metric.set(int(time.time() - _BOOT_TIME))
Allen Liec5beb32016-09-08 15:31:41 -0700106
107
Allen Li45ae8392017-03-02 14:19:35 -0800108def collect_cpu_info():
Allen Lia6b02252016-10-26 14:40:51 -0700109 _cpu_count_metric.set(psutil.cpu_count())
Allen Liec5beb32016-09-08 15:31:41 -0700110
111 times = psutil.cpu_times_percent()
112 for mode in ('user', 'system', 'idle'):
Allen Lia6b02252016-10-26 14:40:51 -0700113 _cpu_time_metric.set(getattr(times, mode), {'mode': mode})
Allen Liec5beb32016-09-08 15:31:41 -0700114
115
Allen Li45ae8392017-03-02 14:19:35 -0800116def collect_disk_info(mountpoints=None):
Allen Liec5beb32016-09-08 15:31:41 -0700117 if mountpoints is None:
118 mountpoints = [disk.mountpoint for disk in psutil.disk_partitions()]
Allen Liec5beb32016-09-08 15:31:41 -0700119 for mountpoint in mountpoints:
Allen Li45ae8392017-03-02 14:19:35 -0800120 _collect_disk_info_single(mountpoint)
121 _collect_fs_inode_info(mountpoint)
122 _collect_disk_io_info()
Allen Liec5beb32016-09-08 15:31:41 -0700123
Allen Liec5beb32016-09-08 15:31:41 -0700124
Allen Li45ae8392017-03-02 14:19:35 -0800125def _collect_disk_info_single(mountpoint):
Allen Lia6b02252016-10-26 14:40:51 -0700126 fields = {'path': mountpoint}
Allen Liec5beb32016-09-08 15:31:41 -0700127
128 try:
Allen Lia6b02252016-10-26 14:40:51 -0700129 usage = psutil.disk_usage(mountpoint)
130 except OSError as ex:
131 if ex.errno == errno.ENOENT:
132 # This happens on Windows when querying a removable drive that
133 # doesn't have any media inserted right now.
134 pass
135 else:
136 raise
137 else:
138 _disk_free_metric.set(usage.free, fields=fields)
139 _disk_total_metric.set(usage.total, fields=fields)
140
141 # inode counts are only available on Unix.
142 if os.name == 'posix':
Allen Li45ae8392017-03-02 14:19:35 -0800143 _collect_fs_inode_info(mountpoint)
Allen Lia6b02252016-10-26 14:40:51 -0700144
145
Allen Li45ae8392017-03-02 14:19:35 -0800146def _collect_fs_inode_info(mountpoint):
Allen Lia6b02252016-10-26 14:40:51 -0700147 fields = {'path': mountpoint}
148 stats = os.statvfs(mountpoint)
149 _inodes_free_metric.set(stats.f_favail, fields=fields)
150 _inodes_total_metric.set(stats.f_files, fields=fields)
151
152
Allen Li45ae8392017-03-02 14:19:35 -0800153def _collect_disk_io_info():
Allen Lia6b02252016-10-26 14:40:51 -0700154 try:
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400155 disk_counters = psutil.disk_io_counters(perdisk=True).items()
Allen Liec5beb32016-09-08 15:31:41 -0700156 except RuntimeError as ex:
157 if "couldn't find any physical disk" in str(ex):
158 # Disk performance counters aren't enabled on Windows.
159 pass
160 else:
161 raise
Allen Lia6b02252016-10-26 14:40:51 -0700162 else:
163 for disk, counters in disk_counters:
164 fields = {'disk': disk}
165 _disk_read_metric.set(counters.read_bytes, fields=fields)
166 _disk_write_metric.set(counters.write_bytes, fields=fields)
Allen Liec5beb32016-09-08 15:31:41 -0700167
168
Allen Li45ae8392017-03-02 14:19:35 -0800169def collect_mem_info():
Allen Liec5beb32016-09-08 15:31:41 -0700170 # We don't report mem.used because (due to virtual memory) it is not
171 # useful.
172 mem = psutil.virtual_memory()
Allen Lia6b02252016-10-26 14:40:51 -0700173 _mem_free_metric.set(mem.available)
174 _mem_total_metric.set(mem.total)
Allen Liec5beb32016-09-08 15:31:41 -0700175
176
Allen Li45ae8392017-03-02 14:19:35 -0800177def collect_load_avg():
Allen Lia6b02252016-10-26 14:40:51 -0700178 try:
179 avg1, avg5, avg15 = os.getloadavg()
180 except OSError:
181 pass
182 else:
183 _load_average_metric.set(avg1, fields={'minutes': 1})
184 _load_average_metric.set(avg5, fields={'minutes': 5})
185 _load_average_metric.set(avg15, fields={'minutes': 15})
Allen Liec5beb32016-09-08 15:31:41 -0700186
187
Allen Li45ae8392017-03-02 14:19:35 -0800188def collect_unix_time():
Allen Lia6b02252016-10-26 14:40:51 -0700189 _unix_time_metric.set(int(time.time() * 1000))