blob: a4fb04e720b628dfd60f26bd17d1cbf1b5800c12 [file] [log] [blame]
Allen Liec5beb32016-09-08 15:31:41 -07001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Allen Li788d1672016-12-16 15:58:23 -08005"""Send system monitoring data to the timeseries monitoring API."""
Allen Liec5beb32016-09-08 15:31:41 -07006
7from __future__ import print_function
8
Allen Li788d1672016-12-16 15:58:23 -08009import random
10import time
Allen Liec5beb32016-09-08 15:31:41 -070011
Allen Li788d1672016-12-16 15:58:23 -080012import psutil
13
14from chromite.lib import commandline
Allen Liec5beb32016-09-08 15:31:41 -070015from chromite.lib import cros_logging as logging
Allen Li788d1672016-12-16 15:58:23 -080016from chromite.lib import metrics
17from chromite.lib import ts_mon_config
Allen Libaa422d2016-12-16 18:21:10 -080018from chromite.scripts.sysmon import loop
19from chromite.scripts.sysmon import osinfo_metrics
Allen Li79317bb2016-12-16 18:25:07 -080020from chromite.scripts.sysmon import prod_metrics
Allen Li788d1672016-12-16 15:58:23 -080021from chromite.scripts.sysmon import puppet_metrics
22from chromite.scripts.sysmon import system_metrics
23from infra_libs.ts_mon.common import interface
Allen Liec5beb32016-09-08 15:31:41 -070024
Allen Li788d1672016-12-16 15:58:23 -080025logger = logging.getLogger(__name__)
26
27
Allen Libaa422d2016-12-16 18:21:10 -080028class _MetricCollector(object):
Allen Li38de6412016-12-16 16:52:45 -080029 """Metric collector class."""
30
Allen Li79317bb2016-12-16 18:25:07 -080031 def __init__(self, collect_prod_hosts=False):
Allen Libaa422d2016-12-16 18:21:10 -080032 self._get_osinfo = _TimedCallback(
33 callback=osinfo_metrics.get_os_info,
34 interval=60 * 60)
Allen Li79317bb2016-12-16 18:25:07 -080035 if collect_prod_hosts:
36 logger.info('Enabling prod host metric collection.')
37 self._get_prod_hosts = _TimedCallback(
38 callback=prod_metrics.get_prod_hosts,
39 interval=10 * 60)
40 else:
41 self._get_prod_hosts = lambda: None
Allen Li38de6412016-12-16 16:52:45 -080042
43 def __call__(self):
44 """Collect metrics."""
45 system_metrics.get_uptime()
46 system_metrics.get_cpu_info()
47 system_metrics.get_disk_info()
48 system_metrics.get_mem_info()
49 system_metrics.get_net_info()
50 system_metrics.get_proc_info()
51 system_metrics.get_load_avg()
52 puppet_metrics.get_puppet_summary()
Allen Li79317bb2016-12-16 18:25:07 -080053 self._get_prod_hosts()
Allen Libaa422d2016-12-16 18:21:10 -080054 self._get_osinfo()
Allen Li38de6412016-12-16 16:52:45 -080055 system_metrics.get_unix_time() # must be just before flush
56 metrics.Flush()
57
58 @property
59 def _next_osinfo_collection(self):
60 return self._last_osinfo_collection + (60 * 60)
Allen Li788d1672016-12-16 15:58:23 -080061
62
Allen Libaa422d2016-12-16 18:21:10 -080063class _TimedCallback(object):
64 """Limits callback to one call in a given interval."""
65
66 def __init__(self, callback, interval):
67 """Initialize instance.
68
69 Args:
70 callback: function to call
71 interval: Number of seconds between allowed calls
72 """
73 self._callback = callback
74 self._interval = interval
75 self._last_called = time.time() - interval
76
77 def __call__(self):
78 if time.time() > self._next_call:
79 self._callback()
80 self._last_called = time.time()
81
82 @property
83 def _next_call(self):
84 return self._last_called + self._interval
85
86
Allen Li788d1672016-12-16 15:58:23 -080087def main():
88 parser = commandline.ArgumentParser(
89 description=__doc__,
Allen Li337729d2016-12-16 16:45:49 -080090 default_log_level='DEBUG')
Allen Li788d1672016-12-16 15:58:23 -080091 parser.add_argument(
92 '--interval',
Allen Li337729d2016-12-16 16:45:49 -080093 default=60,
94 type=int,
Allen Li788d1672016-12-16 15:58:23 -080095 help='time (in seconds) between sampling system metrics')
Allen Li79317bb2016-12-16 18:25:07 -080096 parser.add_argument(
97 '--collect-prod-hosts',
98 action='store_true',
99 help='Enable collection of prod host metrics, like roles')
Allen Li788d1672016-12-16 15:58:23 -0800100 opts = parser.parse_args()
101 opts.Freeze()
102
103 # This returns a 0 value the first time it's called. Call it now and
104 # discard the return value.
105 psutil.cpu_times_percent()
106
107 # Wait a random amount of time before starting the loop in case sysmon
108 # is started at exactly the same time on all machines.
109 time.sleep(random.uniform(0, opts.interval))
110
111 # This call returns a context manager that doesn't do anything, so we
112 # ignore the return value.
113 ts_mon_config.SetupTsMonGlobalState('sysmon', auto_flush=False)
114 # The default prefix is '/chrome/infra/'.
115 interface.state.metric_name_prefix = (interface.state.metric_name_prefix
116 + 'chromeos/sysmon/')
117
Allen Li79317bb2016-12-16 18:25:07 -0800118 collector = _MetricCollector(collect_prod_hosts=opts.collect_prod_hosts)
119 loop.SleepLoop(callback=collector,
Allen Li26d10082016-12-16 16:31:02 -0800120 interval=opts.interval).loop_forever()
Allen Li788d1672016-12-16 15:58:23 -0800121
122
123if __name__ == '__main__':
124 main()