blob: c5a408f58102a8815b3af983d2b69f0aee0e7bce [file] [log] [blame]
Allen Liec5beb32016-09-08 15:31:41 -07001# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Allen Li788d1672016-12-16 15:58:23 -08005"""Send system monitoring data to the timeseries monitoring API."""
Allen Liec5beb32016-09-08 15:31:41 -07006
Allen Li13bdf0c2017-03-02 15:18:16 -08007from __future__ import absolute_import
Allen Liec5beb32016-09-08 15:31:41 -07008from __future__ import print_function
9
Allen Li788d1672016-12-16 15:58:23 -080010import random
11import time
Allen Liec5beb32016-09-08 15:31:41 -070012
Allen Li788d1672016-12-16 15:58:23 -080013import psutil
14
15from chromite.lib import commandline
Allen Liec5beb32016-09-08 15:31:41 -070016from chromite.lib import cros_logging as logging
Allen Li788d1672016-12-16 15:58:23 -080017from chromite.lib import metrics
18from chromite.lib import ts_mon_config
Allen Li325c0762017-03-02 15:00:19 -080019from infra_libs.ts_mon.common import interface
20
Allen Li24bf8182017-03-02 16:41:20 -080021from chromite.scripts.sysmon import git_metrics
Allen Libaa422d2016-12-16 18:21:10 -080022from chromite.scripts.sysmon import loop
Allen Li325c0762017-03-02 15:00:19 -080023from chromite.scripts.sysmon import net_metrics
Allen Libaa422d2016-12-16 18:21:10 -080024from chromite.scripts.sysmon import osinfo_metrics
Allen Li79317bb2016-12-16 18:25:07 -080025from chromite.scripts.sysmon import prod_metrics
Allen Li788d1672016-12-16 15:58:23 -080026from chromite.scripts.sysmon import puppet_metrics
27from chromite.scripts.sysmon import system_metrics
Allen Liec5beb32016-09-08 15:31:41 -070028
Allen Li788d1672016-12-16 15:58:23 -080029logger = logging.getLogger(__name__)
30
31
Allen Libaa422d2016-12-16 18:21:10 -080032class _MetricCollector(object):
Allen Li38de6412016-12-16 16:52:45 -080033 """Metric collector class."""
34
Allen Li79317bb2016-12-16 18:25:07 -080035 def __init__(self, collect_prod_hosts=False):
Allen Li45ae8392017-03-02 14:19:35 -080036 self._collect_osinfo = _TimedCallback(
37 callback=osinfo_metrics.collect_os_info,
Allen Libaa422d2016-12-16 18:21:10 -080038 interval=60 * 60)
Allen Li79317bb2016-12-16 18:25:07 -080039 if collect_prod_hosts:
Allen Li867d4582017-05-24 18:00:43 -070040 logger.info(u'Enabling prod host metric collection.')
Allen Li45ae8392017-03-02 14:19:35 -080041 self._collect_prod_hosts = _TimedCallback(
42 callback=prod_metrics.collect_prod_hosts,
Allen Li79317bb2016-12-16 18:25:07 -080043 interval=10 * 60)
44 else:
Allen Li45ae8392017-03-02 14:19:35 -080045 self._collect_prod_hosts = lambda: None
Allen Li38de6412016-12-16 16:52:45 -080046
47 def __call__(self):
48 """Collect metrics."""
Allen Li45ae8392017-03-02 14:19:35 -080049 system_metrics.collect_uptime()
50 system_metrics.collect_cpu_info()
51 system_metrics.collect_disk_info()
52 system_metrics.collect_mem_info()
Allen Li325c0762017-03-02 15:00:19 -080053 net_metrics.collect_net_info()
Allen Li45ae8392017-03-02 14:19:35 -080054 system_metrics.collect_proc_info()
55 system_metrics.collect_load_avg()
56 puppet_metrics.collect_puppet_summary()
Allen Li24bf8182017-03-02 16:41:20 -080057 git_metrics.collect_git_metrics()
Allen Li45ae8392017-03-02 14:19:35 -080058 self._collect_prod_hosts()
59 self._collect_osinfo()
60 system_metrics.collect_unix_time() # must be just before flush
Allen Li38de6412016-12-16 16:52:45 -080061 metrics.Flush()
62
63 @property
64 def _next_osinfo_collection(self):
65 return self._last_osinfo_collection + (60 * 60)
Allen Li788d1672016-12-16 15:58:23 -080066
67
Allen Libaa422d2016-12-16 18:21:10 -080068class _TimedCallback(object):
69 """Limits callback to one call in a given interval."""
70
71 def __init__(self, callback, interval):
72 """Initialize instance.
73
74 Args:
75 callback: function to call
76 interval: Number of seconds between allowed calls
77 """
78 self._callback = callback
79 self._interval = interval
80 self._last_called = time.time() - interval
81
82 def __call__(self):
83 if time.time() > self._next_call:
84 self._callback()
85 self._last_called = time.time()
86
87 @property
88 def _next_call(self):
89 return self._last_called + self._interval
90
91
Allen Li788d1672016-12-16 15:58:23 -080092def main():
93 parser = commandline.ArgumentParser(
94 description=__doc__,
Allen Li337729d2016-12-16 16:45:49 -080095 default_log_level='DEBUG')
Allen Li788d1672016-12-16 15:58:23 -080096 parser.add_argument(
97 '--interval',
Allen Li337729d2016-12-16 16:45:49 -080098 default=60,
99 type=int,
Allen Li788d1672016-12-16 15:58:23 -0800100 help='time (in seconds) between sampling system metrics')
Allen Li79317bb2016-12-16 18:25:07 -0800101 parser.add_argument(
102 '--collect-prod-hosts',
103 action='store_true',
104 help='Enable collection of prod host metrics, like roles')
Allen Li788d1672016-12-16 15:58:23 -0800105 opts = parser.parse_args()
106 opts.Freeze()
107
108 # This returns a 0 value the first time it's called. Call it now and
109 # discard the return value.
110 psutil.cpu_times_percent()
111
112 # Wait a random amount of time before starting the loop in case sysmon
113 # is started at exactly the same time on all machines.
114 time.sleep(random.uniform(0, opts.interval))
115
116 # This call returns a context manager that doesn't do anything, so we
117 # ignore the return value.
118 ts_mon_config.SetupTsMonGlobalState('sysmon', auto_flush=False)
119 # The default prefix is '/chrome/infra/'.
120 interface.state.metric_name_prefix = (interface.state.metric_name_prefix
121 + 'chromeos/sysmon/')
122
Allen Li79317bb2016-12-16 18:25:07 -0800123 collector = _MetricCollector(collect_prod_hosts=opts.collect_prod_hosts)
124 loop.SleepLoop(callback=collector,
Allen Li26d10082016-12-16 16:31:02 -0800125 interval=opts.interval).loop_forever()
Allen Li788d1672016-12-16 15:58:23 -0800126
127
128if __name__ == '__main__':
129 main()