Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 1 | # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 5 | """System metrics.""" |
| 6 | |
Allen Li | 13bdf0c | 2017-03-02 15:18:16 -0800 | [diff] [blame] | 7 | from __future__ import absolute_import |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 8 | from __future__ import print_function |
| 9 | |
| 10 | import errno |
| 11 | import os |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 12 | import time |
| 13 | |
| 14 | import psutil |
| 15 | |
| 16 | from chromite.lib import cros_logging as logging |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 17 | from chromite.lib import metrics |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 18 | |
Allen Li | 79317bb | 2016-12-16 18:25:07 -0800 | [diff] [blame] | 19 | logger = logging.getLogger(__name__) |
| 20 | |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 21 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 22 | _cpu_count_metric = metrics.GaugeMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 23 | 'dev/cpu/count', |
| 24 | description='Number of CPU cores.') |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 25 | _cpu_time_metric = metrics.FloatMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 26 | 'dev/cpu/time', |
| 27 | description='percentage of time spent by the CPU ' |
| 28 | 'in different states.') |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 29 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 30 | _disk_free_metric = metrics.GaugeMetric( |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 31 | 'dev/disk/free', |
| 32 | description='Available bytes on disk partition.', |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 33 | units=metrics.MetricsDataUnits.BYTES) |
| 34 | _disk_total_metric = metrics.GaugeMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 35 | 'dev/disk/total', |
| 36 | description='Total bytes on disk partition.', |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 37 | units=metrics.MetricsDataUnits.BYTES) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 38 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 39 | _inodes_free_metric = metrics.GaugeMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 40 | 'dev/inodes/free', |
| 41 | description='Number of available inodes on ' |
| 42 | 'disk partition (unix only).') |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 43 | _inodes_total_metric = metrics.GaugeMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 44 | 'dev/inodes/total', |
| 45 | description='Number of possible inodes on ' |
| 46 | 'disk partition (unix only)') |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 47 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 48 | _mem_free_metric = metrics.GaugeMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 49 | 'dev/mem/free', |
| 50 | description='Amount of memory available to a ' |
| 51 | 'process (in Bytes). Buffers are considered ' |
| 52 | 'free memory.', |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 53 | units=metrics.MetricsDataUnits.BYTES) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 54 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 55 | _mem_total_metric = metrics.GaugeMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 56 | 'dev/mem/total', |
| 57 | description='Total physical memory in Bytes.', |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 58 | units=metrics.MetricsDataUnits.BYTES) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 59 | |
Allen Li | 325c076 | 2017-03-02 15:00:19 -0800 | [diff] [blame] | 60 | _BOOT_TIME = psutil.boot_time() |
Allen Li | c987fc9 | 2017-03-02 14:54:51 -0800 | [diff] [blame] | 61 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 62 | _disk_read_metric = metrics.CounterMetric( |
Allen Li | 325c076 | 2017-03-02 15:00:19 -0800 | [diff] [blame] | 63 | 'dev/disk/read', start_time=_BOOT_TIME, |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 64 | description='Number of Bytes read on disk.', |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 65 | units=metrics.MetricsDataUnits.BYTES) |
| 66 | _disk_write_metric = metrics.CounterMetric( |
Allen Li | 325c076 | 2017-03-02 15:00:19 -0800 | [diff] [blame] | 67 | 'dev/disk/write', start_time=_BOOT_TIME, |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 68 | description='Number of Bytes written on disk.', |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 69 | units=metrics.MetricsDataUnits.BYTES) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 70 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 71 | _uptime_metric = metrics.GaugeMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 72 | 'dev/uptime', |
| 73 | description='Machine uptime, in seconds.', |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 74 | units=metrics.MetricsDataUnits.SECONDS) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 75 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 76 | _load_average_metric = metrics.FloatMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 77 | 'dev/proc/load_average', |
| 78 | description='Number of processes currently ' |
| 79 | 'in the system run queue.') |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 80 | |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 81 | # ts_mon pipeline uses backend clocks when assigning timestamps to metric |
| 82 | # points. By comparing point timestamp to the point value (i.e. time by |
| 83 | # machine's local clock), we can potentially detect some anomalies (clock |
| 84 | # drift, unusually high metrics pipeline delay, completely wrong clocks, etc). |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 85 | # |
| 86 | # It is important to gather this metric right before the flush. |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 87 | _unix_time_metric = metrics.GaugeMetric( |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 88 | 'dev/unix_time', |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 89 | description='Number of milliseconds since epoch' |
| 90 | ' based on local machine clock.') |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 91 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 92 | _os_name_metric = metrics.StringMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 93 | 'proc/os/name', |
| 94 | description='OS name on the machine') |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 95 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 96 | _os_version_metric = metrics.StringMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 97 | 'proc/os/version', |
| 98 | description='OS version on the machine') |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 99 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 100 | _os_arch_metric = metrics.StringMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 101 | 'proc/os/arch', |
| 102 | description='OS architecture on this machine') |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 103 | |
Allen Li | a9c6e80 | 2017-07-11 15:42:47 -0700 | [diff] [blame^] | 104 | _python_arch_metric = metrics.StringMetric( |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 105 | 'proc/python/arch', |
| 106 | description='python userland ' |
| 107 | 'architecture on this machine') |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 108 | |
| 109 | |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 110 | def collect_uptime(): |
Allen Li | 325c076 | 2017-03-02 15:00:19 -0800 | [diff] [blame] | 111 | _uptime_metric.set(int(time.time() - _BOOT_TIME)) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 112 | |
| 113 | |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 114 | def collect_cpu_info(): |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 115 | _cpu_count_metric.set(psutil.cpu_count()) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 116 | |
| 117 | times = psutil.cpu_times_percent() |
| 118 | for mode in ('user', 'system', 'idle'): |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 119 | _cpu_time_metric.set(getattr(times, mode), {'mode': mode}) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 120 | |
| 121 | |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 122 | def collect_disk_info(mountpoints=None): |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 123 | if mountpoints is None: |
| 124 | mountpoints = [disk.mountpoint for disk in psutil.disk_partitions()] |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 125 | for mountpoint in mountpoints: |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 126 | _collect_disk_info_single(mountpoint) |
| 127 | _collect_fs_inode_info(mountpoint) |
| 128 | _collect_disk_io_info() |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 129 | |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 130 | |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 131 | def _collect_disk_info_single(mountpoint): |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 132 | fields = {'path': mountpoint} |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 133 | |
| 134 | try: |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 135 | usage = psutil.disk_usage(mountpoint) |
| 136 | except OSError as ex: |
| 137 | if ex.errno == errno.ENOENT: |
| 138 | # This happens on Windows when querying a removable drive that |
| 139 | # doesn't have any media inserted right now. |
| 140 | pass |
| 141 | else: |
| 142 | raise |
| 143 | else: |
| 144 | _disk_free_metric.set(usage.free, fields=fields) |
| 145 | _disk_total_metric.set(usage.total, fields=fields) |
| 146 | |
| 147 | # inode counts are only available on Unix. |
| 148 | if os.name == 'posix': |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 149 | _collect_fs_inode_info(mountpoint) |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 150 | |
| 151 | |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 152 | def _collect_fs_inode_info(mountpoint): |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 153 | fields = {'path': mountpoint} |
| 154 | stats = os.statvfs(mountpoint) |
| 155 | _inodes_free_metric.set(stats.f_favail, fields=fields) |
| 156 | _inodes_total_metric.set(stats.f_files, fields=fields) |
| 157 | |
| 158 | |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 159 | def _collect_disk_io_info(): |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 160 | try: |
| 161 | disk_counters = psutil.disk_io_counters(perdisk=True).iteritems() |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 162 | except RuntimeError as ex: |
| 163 | if "couldn't find any physical disk" in str(ex): |
| 164 | # Disk performance counters aren't enabled on Windows. |
| 165 | pass |
| 166 | else: |
| 167 | raise |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 168 | else: |
| 169 | for disk, counters in disk_counters: |
| 170 | fields = {'disk': disk} |
| 171 | _disk_read_metric.set(counters.read_bytes, fields=fields) |
| 172 | _disk_write_metric.set(counters.write_bytes, fields=fields) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 173 | |
| 174 | |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 175 | def collect_mem_info(): |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 176 | # We don't report mem.used because (due to virtual memory) it is not |
| 177 | # useful. |
| 178 | mem = psutil.virtual_memory() |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 179 | _mem_free_metric.set(mem.available) |
| 180 | _mem_total_metric.set(mem.total) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 181 | |
| 182 | |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 183 | def collect_load_avg(): |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 184 | try: |
| 185 | avg1, avg5, avg15 = os.getloadavg() |
| 186 | except OSError: |
| 187 | pass |
| 188 | else: |
| 189 | _load_average_metric.set(avg1, fields={'minutes': 1}) |
| 190 | _load_average_metric.set(avg5, fields={'minutes': 5}) |
| 191 | _load_average_metric.set(avg15, fields={'minutes': 15}) |
Allen Li | ec5beb3 | 2016-09-08 15:31:41 -0700 | [diff] [blame] | 192 | |
| 193 | |
Allen Li | 45ae839 | 2017-03-02 14:19:35 -0800 | [diff] [blame] | 194 | def collect_unix_time(): |
Allen Li | a6b0225 | 2016-10-26 14:40:51 -0700 | [diff] [blame] | 195 | _unix_time_metric.set(int(time.time() * 1000)) |