blob: c199f1f55d936059e1254d5b36ed6bd6d6c59911 [file] [log] [blame]
Allen Liec5beb32016-09-08 15:31:41 -07001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Allen Li788d1672016-12-16 15:58:23 -08005"""Send system monitoring data to the timeseries monitoring API."""
Allen Liec5beb32016-09-08 15:31:41 -07006
Allen Li13bdf0c2017-03-02 15:18:16 -08007from __future__ import absolute_import
Allen Liec5beb32016-09-08 15:31:41 -07008from __future__ import print_function
Allen Li13bdf0c2017-03-02 15:18:16 -08009from __future__ import unicode_literals
Allen Liec5beb32016-09-08 15:31:41 -070010
Allen Li788d1672016-12-16 15:58:23 -080011import random
12import time
Allen Liec5beb32016-09-08 15:31:41 -070013
Allen Li788d1672016-12-16 15:58:23 -080014import psutil
15
16from chromite.lib import commandline
Allen Liec5beb32016-09-08 15:31:41 -070017from chromite.lib import cros_logging as logging
Allen Li788d1672016-12-16 15:58:23 -080018from chromite.lib import metrics
19from chromite.lib import ts_mon_config
Allen Li325c0762017-03-02 15:00:19 -080020from infra_libs.ts_mon.common import interface
21
Allen Libaa422d2016-12-16 18:21:10 -080022from chromite.scripts.sysmon import loop
Allen Li325c0762017-03-02 15:00:19 -080023from chromite.scripts.sysmon import net_metrics
Allen Libaa422d2016-12-16 18:21:10 -080024from chromite.scripts.sysmon import osinfo_metrics
Allen Li79317bb2016-12-16 18:25:07 -080025from chromite.scripts.sysmon import prod_metrics
Allen Li788d1672016-12-16 15:58:23 -080026from chromite.scripts.sysmon import puppet_metrics
27from chromite.scripts.sysmon import system_metrics
Allen Liec5beb32016-09-08 15:31:41 -070028
Allen Li788d1672016-12-16 15:58:23 -080029logger = logging.getLogger(__name__)
30
31
Allen Libaa422d2016-12-16 18:21:10 -080032class _MetricCollector(object):
Allen Li38de6412016-12-16 16:52:45 -080033 """Metric collector class."""
34
Allen Li79317bb2016-12-16 18:25:07 -080035 def __init__(self, collect_prod_hosts=False):
Allen Li45ae8392017-03-02 14:19:35 -080036 self._collect_osinfo = _TimedCallback(
37 callback=osinfo_metrics.collect_os_info,
Allen Libaa422d2016-12-16 18:21:10 -080038 interval=60 * 60)
Allen Li79317bb2016-12-16 18:25:07 -080039 if collect_prod_hosts:
40 logger.info('Enabling prod host metric collection.')
Allen Li45ae8392017-03-02 14:19:35 -080041 self._collect_prod_hosts = _TimedCallback(
42 callback=prod_metrics.collect_prod_hosts,
Allen Li79317bb2016-12-16 18:25:07 -080043 interval=10 * 60)
44 else:
Allen Li45ae8392017-03-02 14:19:35 -080045 self._collect_prod_hosts = lambda: None
Allen Li38de6412016-12-16 16:52:45 -080046
47 def __call__(self):
48 """Collect metrics."""
Allen Li45ae8392017-03-02 14:19:35 -080049 system_metrics.collect_uptime()
50 system_metrics.collect_cpu_info()
51 system_metrics.collect_disk_info()
52 system_metrics.collect_mem_info()
Allen Li325c0762017-03-02 15:00:19 -080053 net_metrics.collect_net_info()
Allen Li45ae8392017-03-02 14:19:35 -080054 system_metrics.collect_proc_info()
55 system_metrics.collect_load_avg()
56 puppet_metrics.collect_puppet_summary()
57 self._collect_prod_hosts()
58 self._collect_osinfo()
59 system_metrics.collect_unix_time() # must be just before flush
Allen Li38de6412016-12-16 16:52:45 -080060 metrics.Flush()
61
62 @property
63 def _next_osinfo_collection(self):
64 return self._last_osinfo_collection + (60 * 60)
Allen Li788d1672016-12-16 15:58:23 -080065
66
Allen Libaa422d2016-12-16 18:21:10 -080067class _TimedCallback(object):
68 """Limits callback to one call in a given interval."""
69
70 def __init__(self, callback, interval):
71 """Initialize instance.
72
73 Args:
74 callback: function to call
75 interval: Number of seconds between allowed calls
76 """
77 self._callback = callback
78 self._interval = interval
79 self._last_called = time.time() - interval
80
81 def __call__(self):
82 if time.time() > self._next_call:
83 self._callback()
84 self._last_called = time.time()
85
86 @property
87 def _next_call(self):
88 return self._last_called + self._interval
89
90
Allen Li788d1672016-12-16 15:58:23 -080091def main():
92 parser = commandline.ArgumentParser(
93 description=__doc__,
Allen Li337729d2016-12-16 16:45:49 -080094 default_log_level='DEBUG')
Allen Li788d1672016-12-16 15:58:23 -080095 parser.add_argument(
96 '--interval',
Allen Li337729d2016-12-16 16:45:49 -080097 default=60,
98 type=int,
Allen Li788d1672016-12-16 15:58:23 -080099 help='time (in seconds) between sampling system metrics')
Allen Li79317bb2016-12-16 18:25:07 -0800100 parser.add_argument(
101 '--collect-prod-hosts',
102 action='store_true',
103 help='Enable collection of prod host metrics, like roles')
Allen Li788d1672016-12-16 15:58:23 -0800104 opts = parser.parse_args()
105 opts.Freeze()
106
107 # This returns a 0 value the first time it's called. Call it now and
108 # discard the return value.
109 psutil.cpu_times_percent()
110
111 # Wait a random amount of time before starting the loop in case sysmon
112 # is started at exactly the same time on all machines.
113 time.sleep(random.uniform(0, opts.interval))
114
115 # This call returns a context manager that doesn't do anything, so we
116 # ignore the return value.
117 ts_mon_config.SetupTsMonGlobalState('sysmon', auto_flush=False)
118 # The default prefix is '/chrome/infra/'.
119 interface.state.metric_name_prefix = (interface.state.metric_name_prefix
120 + 'chromeos/sysmon/')
121
Allen Li79317bb2016-12-16 18:25:07 -0800122 collector = _MetricCollector(collect_prod_hosts=opts.collect_prod_hosts)
123 loop.SleepLoop(callback=collector,
Allen Li26d10082016-12-16 16:31:02 -0800124 interval=opts.interval).loop_forever()
Allen Li788d1672016-12-16 15:58:23 -0800125
126
127if __name__ == '__main__':
128 main()