blob: bd4a41af92c45276ca3b9f12f5660479c5979a59 [file] [log] [blame]
Allen Liec5beb32016-09-08 15:31:41 -07001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Allen Li788d1672016-12-16 15:58:23 -08005"""Send system monitoring data to the timeseries monitoring API."""
Allen Liec5beb32016-09-08 15:31:41 -07006
7from __future__ import print_function
8
Allen Li788d1672016-12-16 15:58:23 -08009import random
10import time
Allen Liec5beb32016-09-08 15:31:41 -070011
Allen Li788d1672016-12-16 15:58:23 -080012import psutil
13
14from chromite.lib import commandline
Allen Liec5beb32016-09-08 15:31:41 -070015from chromite.lib import cros_logging as logging
Allen Li788d1672016-12-16 15:58:23 -080016from chromite.lib import metrics
17from chromite.lib import ts_mon_config
Allen Li325c0762017-03-02 15:00:19 -080018from infra_libs.ts_mon.common import interface
19
Allen Libaa422d2016-12-16 18:21:10 -080020from chromite.scripts.sysmon import loop
Allen Li325c0762017-03-02 15:00:19 -080021from chromite.scripts.sysmon import net_metrics
Allen Libaa422d2016-12-16 18:21:10 -080022from chromite.scripts.sysmon import osinfo_metrics
Allen Li79317bb2016-12-16 18:25:07 -080023from chromite.scripts.sysmon import prod_metrics
Allen Li788d1672016-12-16 15:58:23 -080024from chromite.scripts.sysmon import puppet_metrics
25from chromite.scripts.sysmon import system_metrics
Allen Liec5beb32016-09-08 15:31:41 -070026
Allen Li788d1672016-12-16 15:58:23 -080027logger = logging.getLogger(__name__)
28
29
Allen Libaa422d2016-12-16 18:21:10 -080030class _MetricCollector(object):
Allen Li38de6412016-12-16 16:52:45 -080031 """Metric collector class."""
32
Allen Li79317bb2016-12-16 18:25:07 -080033 def __init__(self, collect_prod_hosts=False):
Allen Li45ae8392017-03-02 14:19:35 -080034 self._collect_osinfo = _TimedCallback(
35 callback=osinfo_metrics.collect_os_info,
Allen Libaa422d2016-12-16 18:21:10 -080036 interval=60 * 60)
Allen Li79317bb2016-12-16 18:25:07 -080037 if collect_prod_hosts:
38 logger.info('Enabling prod host metric collection.')
Allen Li45ae8392017-03-02 14:19:35 -080039 self._collect_prod_hosts = _TimedCallback(
40 callback=prod_metrics.collect_prod_hosts,
Allen Li79317bb2016-12-16 18:25:07 -080041 interval=10 * 60)
42 else:
Allen Li45ae8392017-03-02 14:19:35 -080043 self._collect_prod_hosts = lambda: None
Allen Li38de6412016-12-16 16:52:45 -080044
45 def __call__(self):
46 """Collect metrics."""
Allen Li45ae8392017-03-02 14:19:35 -080047 system_metrics.collect_uptime()
48 system_metrics.collect_cpu_info()
49 system_metrics.collect_disk_info()
50 system_metrics.collect_mem_info()
Allen Li325c0762017-03-02 15:00:19 -080051 net_metrics.collect_net_info()
Allen Li45ae8392017-03-02 14:19:35 -080052 system_metrics.collect_proc_info()
53 system_metrics.collect_load_avg()
54 puppet_metrics.collect_puppet_summary()
55 self._collect_prod_hosts()
56 self._collect_osinfo()
57 system_metrics.collect_unix_time() # must be just before flush
Allen Li38de6412016-12-16 16:52:45 -080058 metrics.Flush()
59
60 @property
61 def _next_osinfo_collection(self):
62 return self._last_osinfo_collection + (60 * 60)
Allen Li788d1672016-12-16 15:58:23 -080063
64
Allen Libaa422d2016-12-16 18:21:10 -080065class _TimedCallback(object):
66 """Limits callback to one call in a given interval."""
67
68 def __init__(self, callback, interval):
69 """Initialize instance.
70
71 Args:
72 callback: function to call
73 interval: Number of seconds between allowed calls
74 """
75 self._callback = callback
76 self._interval = interval
77 self._last_called = time.time() - interval
78
79 def __call__(self):
80 if time.time() > self._next_call:
81 self._callback()
82 self._last_called = time.time()
83
84 @property
85 def _next_call(self):
86 return self._last_called + self._interval
87
88
Allen Li788d1672016-12-16 15:58:23 -080089def main():
90 parser = commandline.ArgumentParser(
91 description=__doc__,
Allen Li337729d2016-12-16 16:45:49 -080092 default_log_level='DEBUG')
Allen Li788d1672016-12-16 15:58:23 -080093 parser.add_argument(
94 '--interval',
Allen Li337729d2016-12-16 16:45:49 -080095 default=60,
96 type=int,
Allen Li788d1672016-12-16 15:58:23 -080097 help='time (in seconds) between sampling system metrics')
Allen Li79317bb2016-12-16 18:25:07 -080098 parser.add_argument(
99 '--collect-prod-hosts',
100 action='store_true',
101 help='Enable collection of prod host metrics, like roles')
Allen Li788d1672016-12-16 15:58:23 -0800102 opts = parser.parse_args()
103 opts.Freeze()
104
105 # This returns a 0 value the first time it's called. Call it now and
106 # discard the return value.
107 psutil.cpu_times_percent()
108
109 # Wait a random amount of time before starting the loop in case sysmon
110 # is started at exactly the same time on all machines.
111 time.sleep(random.uniform(0, opts.interval))
112
113 # This call returns a context manager that doesn't do anything, so we
114 # ignore the return value.
115 ts_mon_config.SetupTsMonGlobalState('sysmon', auto_flush=False)
116 # The default prefix is '/chrome/infra/'.
117 interface.state.metric_name_prefix = (interface.state.metric_name_prefix
118 + 'chromeos/sysmon/')
119
Allen Li79317bb2016-12-16 18:25:07 -0800120 collector = _MetricCollector(collect_prod_hosts=opts.collect_prod_hosts)
121 loop.SleepLoop(callback=collector,
Allen Li26d10082016-12-16 16:31:02 -0800122 interval=opts.interval).loop_forever()
Allen Li788d1672016-12-16 15:58:23 -0800123
124
125if __name__ == '__main__':
126 main()