blob: 86c4d345c01793fb1ba88a01d383992c9616271f [file] [log] [blame]
Allen Liec5beb32016-09-08 15:31:41 -07001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Allen Li788d1672016-12-16 15:58:23 -08005"""Send system monitoring data to the timeseries monitoring API."""
Allen Liec5beb32016-09-08 15:31:41 -07006
Allen Li13bdf0c2017-03-02 15:18:16 -08007from __future__ import absolute_import
Allen Liec5beb32016-09-08 15:31:41 -07008from __future__ import print_function
Allen Li13bdf0c2017-03-02 15:18:16 -08009from __future__ import unicode_literals
Allen Liec5beb32016-09-08 15:31:41 -070010
Allen Li788d1672016-12-16 15:58:23 -080011import random
12import time
Allen Liec5beb32016-09-08 15:31:41 -070013
Allen Li788d1672016-12-16 15:58:23 -080014import psutil
15
16from chromite.lib import commandline
Allen Liec5beb32016-09-08 15:31:41 -070017from chromite.lib import cros_logging as logging
Allen Li788d1672016-12-16 15:58:23 -080018from chromite.lib import metrics
19from chromite.lib import ts_mon_config
Allen Li325c0762017-03-02 15:00:19 -080020from infra_libs.ts_mon.common import interface
21
Allen Li24bf8182017-03-02 16:41:20 -080022from chromite.scripts.sysmon import git_metrics
Allen Libaa422d2016-12-16 18:21:10 -080023from chromite.scripts.sysmon import loop
Allen Li325c0762017-03-02 15:00:19 -080024from chromite.scripts.sysmon import net_metrics
Allen Libaa422d2016-12-16 18:21:10 -080025from chromite.scripts.sysmon import osinfo_metrics
Allen Li79317bb2016-12-16 18:25:07 -080026from chromite.scripts.sysmon import prod_metrics
Allen Li788d1672016-12-16 15:58:23 -080027from chromite.scripts.sysmon import puppet_metrics
28from chromite.scripts.sysmon import system_metrics
Allen Liec5beb32016-09-08 15:31:41 -070029
Allen Li788d1672016-12-16 15:58:23 -080030logger = logging.getLogger(__name__)
31
32
Allen Libaa422d2016-12-16 18:21:10 -080033class _MetricCollector(object):
Allen Li38de6412016-12-16 16:52:45 -080034 """Metric collector class."""
35
Allen Li79317bb2016-12-16 18:25:07 -080036 def __init__(self, collect_prod_hosts=False):
Allen Li45ae8392017-03-02 14:19:35 -080037 self._collect_osinfo = _TimedCallback(
38 callback=osinfo_metrics.collect_os_info,
Allen Libaa422d2016-12-16 18:21:10 -080039 interval=60 * 60)
Allen Li79317bb2016-12-16 18:25:07 -080040 if collect_prod_hosts:
41 logger.info('Enabling prod host metric collection.')
Allen Li45ae8392017-03-02 14:19:35 -080042 self._collect_prod_hosts = _TimedCallback(
43 callback=prod_metrics.collect_prod_hosts,
Allen Li79317bb2016-12-16 18:25:07 -080044 interval=10 * 60)
45 else:
Allen Li45ae8392017-03-02 14:19:35 -080046 self._collect_prod_hosts = lambda: None
Allen Li38de6412016-12-16 16:52:45 -080047
48 def __call__(self):
49 """Collect metrics."""
Allen Li45ae8392017-03-02 14:19:35 -080050 system_metrics.collect_uptime()
51 system_metrics.collect_cpu_info()
52 system_metrics.collect_disk_info()
53 system_metrics.collect_mem_info()
Allen Li325c0762017-03-02 15:00:19 -080054 net_metrics.collect_net_info()
Allen Li45ae8392017-03-02 14:19:35 -080055 system_metrics.collect_proc_info()
56 system_metrics.collect_load_avg()
57 puppet_metrics.collect_puppet_summary()
Allen Li24bf8182017-03-02 16:41:20 -080058 git_metrics.collect_git_metrics()
Allen Li45ae8392017-03-02 14:19:35 -080059 self._collect_prod_hosts()
60 self._collect_osinfo()
61 system_metrics.collect_unix_time() # must be just before flush
Allen Li38de6412016-12-16 16:52:45 -080062 metrics.Flush()
63
64 @property
65 def _next_osinfo_collection(self):
66 return self._last_osinfo_collection + (60 * 60)
Allen Li788d1672016-12-16 15:58:23 -080067
68
Allen Libaa422d2016-12-16 18:21:10 -080069class _TimedCallback(object):
70 """Limits callback to one call in a given interval."""
71
72 def __init__(self, callback, interval):
73 """Initialize instance.
74
75 Args:
76 callback: function to call
77 interval: Number of seconds between allowed calls
78 """
79 self._callback = callback
80 self._interval = interval
81 self._last_called = time.time() - interval
82
83 def __call__(self):
84 if time.time() > self._next_call:
85 self._callback()
86 self._last_called = time.time()
87
88 @property
89 def _next_call(self):
90 return self._last_called + self._interval
91
92
Allen Li788d1672016-12-16 15:58:23 -080093def main():
94 parser = commandline.ArgumentParser(
95 description=__doc__,
Allen Li337729d2016-12-16 16:45:49 -080096 default_log_level='DEBUG')
Allen Li788d1672016-12-16 15:58:23 -080097 parser.add_argument(
98 '--interval',
Allen Li337729d2016-12-16 16:45:49 -080099 default=60,
100 type=int,
Allen Li788d1672016-12-16 15:58:23 -0800101 help='time (in seconds) between sampling system metrics')
Allen Li79317bb2016-12-16 18:25:07 -0800102 parser.add_argument(
103 '--collect-prod-hosts',
104 action='store_true',
105 help='Enable collection of prod host metrics, like roles')
Allen Li788d1672016-12-16 15:58:23 -0800106 opts = parser.parse_args()
107 opts.Freeze()
108
109 # This returns a 0 value the first time it's called. Call it now and
110 # discard the return value.
111 psutil.cpu_times_percent()
112
113 # Wait a random amount of time before starting the loop in case sysmon
114 # is started at exactly the same time on all machines.
115 time.sleep(random.uniform(0, opts.interval))
116
117 # This call returns a context manager that doesn't do anything, so we
118 # ignore the return value.
119 ts_mon_config.SetupTsMonGlobalState('sysmon', auto_flush=False)
120 # The default prefix is '/chrome/infra/'.
121 interface.state.metric_name_prefix = (interface.state.metric_name_prefix
122 + 'chromeos/sysmon/')
123
Allen Li79317bb2016-12-16 18:25:07 -0800124 collector = _MetricCollector(collect_prod_hosts=opts.collect_prod_hosts)
125 loop.SleepLoop(callback=collector,
Allen Li26d10082016-12-16 16:31:02 -0800126 interval=opts.interval).loop_forever()
Allen Li788d1672016-12-16 15:58:23 -0800127
128
129if __name__ == '__main__':
130 main()