blob: b2b2349447df9551cd879a02814948ac09450be6 [file] [log] [blame]
xixuanebdb0a82017-04-28 11:25:02 -07001#!/usr/bin/env python2
Congbin Guo87c044e2019-09-09 16:20:47 -07002# -*- coding: utf-8 -*-
Paul Hobbsef4e0702016-06-27 17:01:42 -07003# Copyright 2016 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Script to upload metrics from apache logs to Monarch.
8
9We are interested in static file bandwidth, so it parses out GET requests to
10/static and uploads the sizes to a cumulative metric.
11"""
12from __future__ import print_function
13
14import argparse
Congbin Guoc2b908c2019-09-09 16:24:38 -070015import functools
Paul Hobbs84acd9d2017-09-20 15:10:53 -070016from logging import handlers
Paul Hobbsef4e0702016-06-27 17:01:42 -070017import re
18import sys
19
xixuanebdb0a82017-04-28 11:25:02 -070020# TODO(ayatane): Fix cros lint pylint to work with virtualenv imports
21# pylint: disable=import-error
Paul Hobbsef4e0702016-06-27 17:01:42 -070022
xixuanebdb0a82017-04-28 11:25:02 -070023# only import setup_chromite before chromite import.
24import setup_chromite # pylint: disable=unused-import
Paul Hobbsef4e0702016-06-27 17:01:42 -070025from chromite.lib import ts_mon_config
26from chromite.lib import metrics
Paul Hobbs338baee2016-07-13 13:42:34 -070027from chromite.lib import cros_logging as logging
Paul Hobbsfe0b1c62017-08-18 12:56:14 -070028
Paul Hobbsfe0b1c62017-08-18 12:56:14 -070029# Log rotation parameters. Keep about two weeks of old logs.
30#
31# For more, see the documentation in standard python library for
32# logging.handlers.TimedRotatingFileHandler
33_LOG_ROTATION_TIME = 'H'
34_LOG_ROTATION_INTERVAL = 24 # hours
35_LOG_ROTATION_BACKUP = 14 # backup counts
Paul Hobbsef4e0702016-06-27 17:01:42 -070036
37
38STATIC_GET_MATCHER = re.compile(
39 r'^(?P<ip_addr>\d+\.\d+\.\d+\.\d+) '
Paul Hobbsfa915682016-07-19 15:11:29 -070040 r'.*GET /static/(?P<endpoint>\S*)[^"]*" '
Paul Hobbsef4e0702016-06-27 17:01:42 -070041 r'200 (?P<size>\S+) .*')
Congbin Guo87c044e2019-09-09 16:20:47 -070042# Matcher of all RPC calls log lines, e.g.
43# <ipv4 addr> - - [datetime] "GET /list_suite_controls?key=val HTTP/1.1" 200...
44RPC_USAGE_MATCHER = re.compile(
45 r'^(?P<ip_addr>\d+\.\d+\.\d+\.\d+) '
46 r'.*"(?P<http_method>\S+) /(?P<rpc_name>(?:api/)?[^/?]+)[^"]*" '
47 r'2\d\d (?P<size>\S+) .*')
Paul Hobbsef4e0702016-06-27 17:01:42 -070048
49STATIC_GET_METRIC_NAME = 'chromeos/devserver/apache/static_response_size'
Congbin Guo87c044e2019-09-09 16:20:47 -070050DEVSERVER_RPC_USAGE_METRIC_NAME = 'chromeos/devserver/rpc_usage'
Paul Hobbsef4e0702016-06-27 17:01:42 -070051
52
53LAB_SUBNETS = (
54 ("172.17.40.0", 22),
55 ("100.107.160.0", 19),
56 ("100.115.128.0", 17),
57 ("100.115.254.126", 25),
58 ("100.107.141.128", 25),
59 ("172.27.212.0", 22),
60 ("100.107.156.192", 26),
61 ("172.22.29.0", 25),
62 ("172.22.38.0", 23),
63 ("100.107.224.0", 23),
64 ("100.107.226.0", 25),
65 ("100.107.126.0", 25),
66)
67
68def IPToNum(ip):
Paul Hobbsfa915682016-07-19 15:11:29 -070069 """Returns the integer represented by an IPv4 string.
70
71 Args:
72 ip: An IPv4-formatted string.
73 """
Congbin Guoc2b908c2019-09-09 16:24:38 -070074 return functools.reduce(lambda seed, x: seed * 2**8 + int(x),
75 ip.split('.'),
76 0)
Paul Hobbsef4e0702016-06-27 17:01:42 -070077
78
79def MatchesSubnet(ip, base, mask):
Paul Hobbsfa915682016-07-19 15:11:29 -070080 """Whether the ip string |ip| matches the subnet |base|, |mask|.
81
82 Args:
83 ip: An IPv4 string.
84 base: An IPv4 string which is the lowest value in the subnet.
85 mask: The number of bits which are not wildcards in the subnet.
86 """
Paul Hobbsef4e0702016-06-27 17:01:42 -070087 ip_value = IPToNum(ip)
88 base_value = IPToNum(base)
89 mask = (2**mask - 1) << (32 - mask)
90 return (ip_value & mask) == (base_value & mask)
91
92
93def InLab(ip):
Paul Hobbsfa915682016-07-19 15:11:29 -070094 """Whether |ip| is an IPv4 address which is in the ChromeOS Lab.
95
96 Args:
97 ip: An IPv4 address to be tested.
98 """
Paul Hobbsef4e0702016-06-27 17:01:42 -070099 return any(MatchesSubnet(ip, base, mask)
100 for (base, mask) in LAB_SUBNETS)
101
102
Paul Hobbs5c56c832016-07-22 17:21:57 -0700103MILESTONE_PATTERN = re.compile(r'R\d+')
104
105FILENAME_CONSTANTS = [
106 'stateful.tgz',
107 'client-autotest.tar.bz2',
108 'chromiumos_test_image.bin',
109 'autotest_server_package.tar.bz2',
110]
111
112FILENAME_PATTERNS = [(re.compile(s), s) for s in FILENAME_CONSTANTS] + [
113 (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'),
114 (re.compile(r'chromeos_.*_delta_test\.bin-.*'),
115 'chromeos_*_delta_test.bin-*'),
116 (re.compile(r'chromeos_.*_full_test\.bin-.*'),
117 'chromeos_*_full_test.bin-*'),
118 (re.compile(r'test-.*\.bz2'), 'test-*.bz2'),
119 (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'),
120]
121
122
123def MatchAny(needle, patterns, default=''):
124 for pattern, value in patterns:
125 if pattern.match(needle):
126 return value
127 return default
128
129
Paul Hobbsfa915682016-07-19 15:11:29 -0700130def ParseStaticEndpoint(endpoint):
131 """Parses a /static/.* URL path into build_config, milestone, and filename.
132
133 Static endpoints are expected to be of the form
134 /static/$BUILD_CONFIG/$MILESTONE-$VERSION/$FILENAME
135
136 This function expects the '/static/' prefix to already be stripped off.
137
138 Args:
139 endpoint: A string which is the matched URL path after /static/
140 """
141 build_config, milestone, filename = [''] * 3
Paul Hobbsef4e0702016-06-27 17:01:42 -0700142 try:
Paul Hobbsfa915682016-07-19 15:11:29 -0700143 parts = endpoint.split('/')
144 build_config = parts[0]
145 if len(parts) >= 2:
146 version = parts[1]
147 milestone = version[:version.index('-')]
Paul Hobbs5c56c832016-07-22 17:21:57 -0700148 if not MILESTONE_PATTERN.match(milestone):
149 milestone = ''
Paul Hobbsfa915682016-07-19 15:11:29 -0700150 if len(parts) >= 3:
Paul Hobbs5c56c832016-07-22 17:21:57 -0700151 filename = MatchAny(parts[-1], FILENAME_PATTERNS)
152
Paul Hobbsfa915682016-07-19 15:11:29 -0700153 except IndexError as e:
Congbin Guoc2b908c2019-09-09 16:24:38 -0700154 logging.debug('%s failed to parse. Caught %s', endpoint, str(e))
Paul Hobbsfa915682016-07-19 15:11:29 -0700155
156 return build_config, milestone, filename
157
158
159def EmitStaticRequestMetric(m):
Congbin Guo87c044e2019-09-09 16:20:47 -0700160 """Emits a Counter metric for successful GETs to /static endpoints.
Paul Hobbsfa915682016-07-19 15:11:29 -0700161
162 Args:
163 m: A regex match object
164 """
165 build_config, milestone, filename = ParseStaticEndpoint(m.group('endpoint'))
166
167 try:
168 size = int(m.group('size'))
Paul Hobbsef4e0702016-06-27 17:01:42 -0700169 except ValueError: # Zero is represented by "-"
170 size = 0
171
172 metrics.Counter(STATIC_GET_METRIC_NAME).increment_by(
173 size, fields={
Paul Hobbsfa915682016-07-19 15:11:29 -0700174 'build_config': build_config,
175 'milestone': milestone,
Paul Hobbs487e3812016-07-22 15:45:33 -0700176 'in_lab': InLab(m.group('ip_addr')),
Paul Hobbsfa915682016-07-19 15:11:29 -0700177 'endpoint': filename})
Paul Hobbsef4e0702016-06-27 17:01:42 -0700178
179
Congbin Guo87c044e2019-09-09 16:20:47 -0700180def EmitRpcUsageMetric(m):
181 """Emits a Counter metric for successful RPC requests.
182
183 Args:
184 m: A regex match object
185 """
186 try:
187 size = int(m.group('size'))
188 except ValueError: # Zero is represented by "-"
189 size = 0
190
191 metrics.Counter(STATIC_GET_METRIC_NAME).increment_by(
192 size, fields={
193 'http_method': m.group('http_method'),
194 'rpc_name': m.group('rpc_name'),
195 'in_lab': InLab(m.group('ip_addr')),
196 })
197
198
Paul Hobbsef4e0702016-06-27 17:01:42 -0700199def RunMatchers(stream, matchers):
Paul Hobbsfa915682016-07-19 15:11:29 -0700200 """Parses lines of |stream| using patterns and emitters from |matchers|
201
202 Args:
203 stream: A file object to read from.
204 matchers: A list of pairs of (matcher, emitter), where matcher is a regex
205 and emitter is a function called when the regex matches.
206 """
Paul Hobbs338baee2016-07-13 13:42:34 -0700207 for line in iter(stream.readline, ''):
Paul Hobbsef4e0702016-06-27 17:01:42 -0700208 for matcher, emitter in matchers:
Paul Hobbs338baee2016-07-13 13:42:34 -0700209 logging.debug('Emitting %s for input "%s"',
210 emitter.__name__, line.strip())
Paul Hobbsef4e0702016-06-27 17:01:42 -0700211 m = matcher.match(line)
212 if m:
213 emitter(m)
Paul Hobbsef4e0702016-06-27 17:01:42 -0700214
215
216# TODO(phobbs) add a matcher for all requests, not just static files.
217MATCHERS = [
218 (STATIC_GET_MATCHER, EmitStaticRequestMetric),
Congbin Guo87c044e2019-09-09 16:20:47 -0700219 (RPC_USAGE_MATCHER, EmitRpcUsageMetric),
Paul Hobbsef4e0702016-06-27 17:01:42 -0700220]
221
222
223def ParseArgs():
224 """Parses command line arguments."""
225 p = argparse.ArgumentParser(
226 description='Parses apache logs and emits metrics to Monarch')
xixuanebdb0a82017-04-28 11:25:02 -0700227 p.add_argument('--logfile', required=True)
Paul Hobbsef4e0702016-06-27 17:01:42 -0700228 return p.parse_args()
229
230
231def main():
232 """Sets up logging and runs matchers against stdin"""
233 args = ParseArgs()
234 root = logging.getLogger()
xixuanebdb0a82017-04-28 11:25:02 -0700235
Paul Hobbs84acd9d2017-09-20 15:10:53 -0700236 root.addHandler(handlers.TimedRotatingFileHandler(
Paul Hobbsfe0b1c62017-08-18 12:56:14 -0700237 args.logfile, when=_LOG_ROTATION_TIME,
238 interval=_LOG_ROTATION_INTERVAL,
239 backupCount=_LOG_ROTATION_BACKUP))
Paul Hobbsef4e0702016-06-27 17:01:42 -0700240 root.setLevel(logging.DEBUG)
Paul Hobbsfe0b1c62017-08-18 12:56:14 -0700241 with ts_mon_config.SetupTsMonGlobalState('devserver_apache_log_metrics',
242 indirect=True):
243 RunMatchers(sys.stdin, MATCHERS)
Paul Hobbsef4e0702016-06-27 17:01:42 -0700244
245
246if __name__ == '__main__':
247 main()