blob: 06b377c4793be6f69925417198dc766b60bcb65a [file] [log] [blame]
Allen Liec5beb32016-09-08 15:31:41 -07001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Allen Liec5beb32016-09-08 15:31:41 -07005"""System metrics."""
6
Allen Li13bdf0c2017-03-02 15:18:16 -08007from __future__ import absolute_import
Allen Liec5beb32016-09-08 15:31:41 -07008from __future__ import print_function
9
10import errno
11import os
Allen Liec5beb32016-09-08 15:31:41 -070012import time
13
14import psutil
15
16from chromite.lib import cros_logging as logging
Allen Lia9c6e802017-07-11 15:42:47 -070017from chromite.lib import metrics
Allen Liec5beb32016-09-08 15:31:41 -070018
Allen Li79317bb2016-12-16 18:25:07 -080019logger = logging.getLogger(__name__)
20
Allen Liec5beb32016-09-08 15:31:41 -070021
Allen Lia9c6e802017-07-11 15:42:47 -070022_cpu_count_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070023 'dev/cpu/count',
24 description='Number of CPU cores.')
Allen Lia9c6e802017-07-11 15:42:47 -070025_cpu_time_metric = metrics.FloatMetric(
Allen Lia6b02252016-10-26 14:40:51 -070026 'dev/cpu/time',
27 description='percentage of time spent by the CPU '
28 'in different states.')
Allen Liec5beb32016-09-08 15:31:41 -070029
Allen Lia9c6e802017-07-11 15:42:47 -070030_disk_free_metric = metrics.GaugeMetric(
Allen Liec5beb32016-09-08 15:31:41 -070031 'dev/disk/free',
32 description='Available bytes on disk partition.',
Allen Lia9c6e802017-07-11 15:42:47 -070033 units=metrics.MetricsDataUnits.BYTES)
34_disk_total_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070035 'dev/disk/total',
36 description='Total bytes on disk partition.',
Allen Lia9c6e802017-07-11 15:42:47 -070037 units=metrics.MetricsDataUnits.BYTES)
Allen Liec5beb32016-09-08 15:31:41 -070038
Allen Lia9c6e802017-07-11 15:42:47 -070039_inodes_free_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070040 'dev/inodes/free',
41 description='Number of available inodes on '
42 'disk partition (unix only).')
Allen Lia9c6e802017-07-11 15:42:47 -070043_inodes_total_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070044 'dev/inodes/total',
45 description='Number of possible inodes on '
46 'disk partition (unix only)')
Allen Liec5beb32016-09-08 15:31:41 -070047
Allen Lia9c6e802017-07-11 15:42:47 -070048_mem_free_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070049 'dev/mem/free',
50 description='Amount of memory available to a '
51 'process (in Bytes). Buffers are considered '
52 'free memory.',
Allen Lia9c6e802017-07-11 15:42:47 -070053 units=metrics.MetricsDataUnits.BYTES)
Allen Liec5beb32016-09-08 15:31:41 -070054
Allen Lia9c6e802017-07-11 15:42:47 -070055_mem_total_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070056 'dev/mem/total',
57 description='Total physical memory in Bytes.',
Allen Lia9c6e802017-07-11 15:42:47 -070058 units=metrics.MetricsDataUnits.BYTES)
Allen Liec5beb32016-09-08 15:31:41 -070059
Allen Li325c0762017-03-02 15:00:19 -080060_BOOT_TIME = psutil.boot_time()
Allen Lic987fc92017-03-02 14:54:51 -080061
Allen Lia9c6e802017-07-11 15:42:47 -070062_disk_read_metric = metrics.CounterMetric(
Allen Li325c0762017-03-02 15:00:19 -080063 'dev/disk/read', start_time=_BOOT_TIME,
Allen Lia6b02252016-10-26 14:40:51 -070064 description='Number of Bytes read on disk.',
Allen Lia9c6e802017-07-11 15:42:47 -070065 units=metrics.MetricsDataUnits.BYTES)
66_disk_write_metric = metrics.CounterMetric(
Allen Li325c0762017-03-02 15:00:19 -080067 'dev/disk/write', start_time=_BOOT_TIME,
Allen Lia6b02252016-10-26 14:40:51 -070068 description='Number of Bytes written on disk.',
Allen Lia9c6e802017-07-11 15:42:47 -070069 units=metrics.MetricsDataUnits.BYTES)
Allen Liec5beb32016-09-08 15:31:41 -070070
Allen Lia9c6e802017-07-11 15:42:47 -070071_uptime_metric = metrics.GaugeMetric(
Allen Lia6b02252016-10-26 14:40:51 -070072 'dev/uptime',
73 description='Machine uptime, in seconds.',
Allen Lia9c6e802017-07-11 15:42:47 -070074 units=metrics.MetricsDataUnits.SECONDS)
Allen Liec5beb32016-09-08 15:31:41 -070075
Allen Lia9c6e802017-07-11 15:42:47 -070076_load_average_metric = metrics.FloatMetric(
Allen Lia6b02252016-10-26 14:40:51 -070077 'dev/proc/load_average',
78 description='Number of processes currently '
79 'in the system run queue.')
Allen Liec5beb32016-09-08 15:31:41 -070080
Allen Lia6b02252016-10-26 14:40:51 -070081# ts_mon pipeline uses backend clocks when assigning timestamps to metric
82# points. By comparing point timestamp to the point value (i.e. time by
83# machine's local clock), we can potentially detect some anomalies (clock
84# drift, unusually high metrics pipeline delay, completely wrong clocks, etc).
Allen Liec5beb32016-09-08 15:31:41 -070085#
86# It is important to gather this metric right before the flush.
Allen Lia9c6e802017-07-11 15:42:47 -070087_unix_time_metric = metrics.GaugeMetric(
Allen Liec5beb32016-09-08 15:31:41 -070088 'dev/unix_time',
Allen Lia6b02252016-10-26 14:40:51 -070089 description='Number of milliseconds since epoch'
90 ' based on local machine clock.')
Allen Liec5beb32016-09-08 15:31:41 -070091
Allen Lia9c6e802017-07-11 15:42:47 -070092_os_name_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -070093 'proc/os/name',
94 description='OS name on the machine')
Allen Liec5beb32016-09-08 15:31:41 -070095
Allen Lia9c6e802017-07-11 15:42:47 -070096_os_version_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -070097 'proc/os/version',
98 description='OS version on the machine')
Allen Liec5beb32016-09-08 15:31:41 -070099
Allen Lia9c6e802017-07-11 15:42:47 -0700100_os_arch_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -0700101 'proc/os/arch',
102 description='OS architecture on this machine')
Allen Liec5beb32016-09-08 15:31:41 -0700103
Allen Lia9c6e802017-07-11 15:42:47 -0700104_python_arch_metric = metrics.StringMetric(
Allen Lia6b02252016-10-26 14:40:51 -0700105 'proc/python/arch',
106 description='python userland '
107 'architecture on this machine')
Allen Liec5beb32016-09-08 15:31:41 -0700108
109
Allen Li45ae8392017-03-02 14:19:35 -0800110def collect_uptime():
Allen Li325c0762017-03-02 15:00:19 -0800111 _uptime_metric.set(int(time.time() - _BOOT_TIME))
Allen Liec5beb32016-09-08 15:31:41 -0700112
113
Allen Li45ae8392017-03-02 14:19:35 -0800114def collect_cpu_info():
Allen Lia6b02252016-10-26 14:40:51 -0700115 _cpu_count_metric.set(psutil.cpu_count())
Allen Liec5beb32016-09-08 15:31:41 -0700116
117 times = psutil.cpu_times_percent()
118 for mode in ('user', 'system', 'idle'):
Allen Lia6b02252016-10-26 14:40:51 -0700119 _cpu_time_metric.set(getattr(times, mode), {'mode': mode})
Allen Liec5beb32016-09-08 15:31:41 -0700120
121
Allen Li45ae8392017-03-02 14:19:35 -0800122def collect_disk_info(mountpoints=None):
Allen Liec5beb32016-09-08 15:31:41 -0700123 if mountpoints is None:
124 mountpoints = [disk.mountpoint for disk in psutil.disk_partitions()]
Allen Liec5beb32016-09-08 15:31:41 -0700125 for mountpoint in mountpoints:
Allen Li45ae8392017-03-02 14:19:35 -0800126 _collect_disk_info_single(mountpoint)
127 _collect_fs_inode_info(mountpoint)
128 _collect_disk_io_info()
Allen Liec5beb32016-09-08 15:31:41 -0700129
Allen Liec5beb32016-09-08 15:31:41 -0700130
Allen Li45ae8392017-03-02 14:19:35 -0800131def _collect_disk_info_single(mountpoint):
Allen Lia6b02252016-10-26 14:40:51 -0700132 fields = {'path': mountpoint}
Allen Liec5beb32016-09-08 15:31:41 -0700133
134 try:
Allen Lia6b02252016-10-26 14:40:51 -0700135 usage = psutil.disk_usage(mountpoint)
136 except OSError as ex:
137 if ex.errno == errno.ENOENT:
138 # This happens on Windows when querying a removable drive that
139 # doesn't have any media inserted right now.
140 pass
141 else:
142 raise
143 else:
144 _disk_free_metric.set(usage.free, fields=fields)
145 _disk_total_metric.set(usage.total, fields=fields)
146
147 # inode counts are only available on Unix.
148 if os.name == 'posix':
Allen Li45ae8392017-03-02 14:19:35 -0800149 _collect_fs_inode_info(mountpoint)
Allen Lia6b02252016-10-26 14:40:51 -0700150
151
Allen Li45ae8392017-03-02 14:19:35 -0800152def _collect_fs_inode_info(mountpoint):
Allen Lia6b02252016-10-26 14:40:51 -0700153 fields = {'path': mountpoint}
154 stats = os.statvfs(mountpoint)
155 _inodes_free_metric.set(stats.f_favail, fields=fields)
156 _inodes_total_metric.set(stats.f_files, fields=fields)
157
158
Allen Li45ae8392017-03-02 14:19:35 -0800159def _collect_disk_io_info():
Allen Lia6b02252016-10-26 14:40:51 -0700160 try:
161 disk_counters = psutil.disk_io_counters(perdisk=True).iteritems()
Allen Liec5beb32016-09-08 15:31:41 -0700162 except RuntimeError as ex:
163 if "couldn't find any physical disk" in str(ex):
164 # Disk performance counters aren't enabled on Windows.
165 pass
166 else:
167 raise
Allen Lia6b02252016-10-26 14:40:51 -0700168 else:
169 for disk, counters in disk_counters:
170 fields = {'disk': disk}
171 _disk_read_metric.set(counters.read_bytes, fields=fields)
172 _disk_write_metric.set(counters.write_bytes, fields=fields)
Allen Liec5beb32016-09-08 15:31:41 -0700173
174
Allen Li45ae8392017-03-02 14:19:35 -0800175def collect_mem_info():
Allen Liec5beb32016-09-08 15:31:41 -0700176 # We don't report mem.used because (due to virtual memory) it is not
177 # useful.
178 mem = psutil.virtual_memory()
Allen Lia6b02252016-10-26 14:40:51 -0700179 _mem_free_metric.set(mem.available)
180 _mem_total_metric.set(mem.total)
Allen Liec5beb32016-09-08 15:31:41 -0700181
182
Allen Li45ae8392017-03-02 14:19:35 -0800183def collect_load_avg():
Allen Lia6b02252016-10-26 14:40:51 -0700184 try:
185 avg1, avg5, avg15 = os.getloadavg()
186 except OSError:
187 pass
188 else:
189 _load_average_metric.set(avg1, fields={'minutes': 1})
190 _load_average_metric.set(avg5, fields={'minutes': 5})
191 _load_average_metric.set(avg15, fields={'minutes': 15})
Allen Liec5beb32016-09-08 15:31:41 -0700192
193
Allen Li45ae8392017-03-02 14:19:35 -0800194def collect_unix_time():
Allen Lia6b02252016-10-26 14:40:51 -0700195 _unix_time_metric.set(int(time.time() * 1000))