xixuan | ebdb0a8 | 2017-04-28 11:25:02 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python2 |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 2 | |
| 3 | # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
| 7 | """Script to upload metrics from apache logs to Monarch. |
| 8 | |
| 9 | We are interested in static file bandwidth, so it parses out GET requests to |
| 10 | /static and uploads the sizes to a cumulative metric. |
| 11 | """ |
| 12 | from __future__ import print_function |
| 13 | |
| 14 | import argparse |
Congbin Guo | c2b908c | 2019-09-09 16:24:38 -0700 | [diff] [blame^] | 15 | import functools |
Paul Hobbs | 84acd9d | 2017-09-20 15:10:53 -0700 | [diff] [blame] | 16 | from logging import handlers |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 17 | import re |
| 18 | import sys |
| 19 | |
xixuan | ebdb0a8 | 2017-04-28 11:25:02 -0700 | [diff] [blame] | 20 | # TODO(ayatane): Fix cros lint pylint to work with virtualenv imports |
| 21 | # pylint: disable=import-error |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 22 | |
xixuan | ebdb0a8 | 2017-04-28 11:25:02 -0700 | [diff] [blame] | 23 | # only import setup_chromite before chromite import. |
| 24 | import setup_chromite # pylint: disable=unused-import |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 25 | from chromite.lib import ts_mon_config |
| 26 | from chromite.lib import metrics |
Paul Hobbs | 338baee | 2016-07-13 13:42:34 -0700 | [diff] [blame] | 27 | from chromite.lib import cros_logging as logging |
Paul Hobbs | fe0b1c6 | 2017-08-18 12:56:14 -0700 | [diff] [blame] | 28 | |
| 29 | |
| 30 | # Log rotation parameters. Keep about two weeks of old logs. |
| 31 | # |
| 32 | # For more, see the documentation in standard python library for |
| 33 | # logging.handlers.TimedRotatingFileHandler |
| 34 | _LOG_ROTATION_TIME = 'H' |
| 35 | _LOG_ROTATION_INTERVAL = 24 # hours |
| 36 | _LOG_ROTATION_BACKUP = 14 # backup counts |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 37 | |
| 38 | |
| 39 | STATIC_GET_MATCHER = re.compile( |
| 40 | r'^(?P<ip_addr>\d+\.\d+\.\d+\.\d+) ' |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 41 | r'.*GET /static/(?P<endpoint>\S*)[^"]*" ' |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 42 | r'200 (?P<size>\S+) .*') |
| 43 | |
| 44 | STATIC_GET_METRIC_NAME = 'chromeos/devserver/apache/static_response_size' |
| 45 | |
| 46 | |
| 47 | LAB_SUBNETS = ( |
| 48 | ("172.17.40.0", 22), |
| 49 | ("100.107.160.0", 19), |
| 50 | ("100.115.128.0", 17), |
| 51 | ("100.115.254.126", 25), |
| 52 | ("100.107.141.128", 25), |
| 53 | ("172.27.212.0", 22), |
| 54 | ("100.107.156.192", 26), |
| 55 | ("172.22.29.0", 25), |
| 56 | ("172.22.38.0", 23), |
| 57 | ("100.107.224.0", 23), |
| 58 | ("100.107.226.0", 25), |
| 59 | ("100.107.126.0", 25), |
| 60 | ) |
| 61 | |
| 62 | def IPToNum(ip): |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 63 | """Returns the integer represented by an IPv4 string. |
| 64 | |
| 65 | Args: |
| 66 | ip: An IPv4-formatted string. |
| 67 | """ |
Congbin Guo | c2b908c | 2019-09-09 16:24:38 -0700 | [diff] [blame^] | 68 | return functools.reduce(lambda seed, x: seed * 2**8 + int(x), |
| 69 | ip.split('.'), |
| 70 | 0) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 71 | |
| 72 | |
| 73 | def MatchesSubnet(ip, base, mask): |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 74 | """Whether the ip string |ip| matches the subnet |base|, |mask|. |
| 75 | |
| 76 | Args: |
| 77 | ip: An IPv4 string. |
| 78 | base: An IPv4 string which is the lowest value in the subnet. |
| 79 | mask: The number of bits which are not wildcards in the subnet. |
| 80 | """ |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 81 | ip_value = IPToNum(ip) |
| 82 | base_value = IPToNum(base) |
| 83 | mask = (2**mask - 1) << (32 - mask) |
| 84 | return (ip_value & mask) == (base_value & mask) |
| 85 | |
| 86 | |
| 87 | def InLab(ip): |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 88 | """Whether |ip| is an IPv4 address which is in the ChromeOS Lab. |
| 89 | |
| 90 | Args: |
| 91 | ip: An IPv4 address to be tested. |
| 92 | """ |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 93 | return any(MatchesSubnet(ip, base, mask) |
| 94 | for (base, mask) in LAB_SUBNETS) |
| 95 | |
| 96 | |
Paul Hobbs | 5c56c83 | 2016-07-22 17:21:57 -0700 | [diff] [blame] | 97 | MILESTONE_PATTERN = re.compile(r'R\d+') |
| 98 | |
| 99 | FILENAME_CONSTANTS = [ |
| 100 | 'stateful.tgz', |
| 101 | 'client-autotest.tar.bz2', |
| 102 | 'chromiumos_test_image.bin', |
| 103 | 'autotest_server_package.tar.bz2', |
| 104 | ] |
| 105 | |
| 106 | FILENAME_PATTERNS = [(re.compile(s), s) for s in FILENAME_CONSTANTS] + [ |
| 107 | (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'), |
| 108 | (re.compile(r'chromeos_.*_delta_test\.bin-.*'), |
| 109 | 'chromeos_*_delta_test.bin-*'), |
| 110 | (re.compile(r'chromeos_.*_full_test\.bin-.*'), |
| 111 | 'chromeos_*_full_test.bin-*'), |
| 112 | (re.compile(r'test-.*\.bz2'), 'test-*.bz2'), |
| 113 | (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'), |
| 114 | ] |
| 115 | |
| 116 | |
| 117 | def MatchAny(needle, patterns, default=''): |
| 118 | for pattern, value in patterns: |
| 119 | if pattern.match(needle): |
| 120 | return value |
| 121 | return default |
| 122 | |
| 123 | |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 124 | def ParseStaticEndpoint(endpoint): |
| 125 | """Parses a /static/.* URL path into build_config, milestone, and filename. |
| 126 | |
| 127 | Static endpoints are expected to be of the form |
| 128 | /static/$BUILD_CONFIG/$MILESTONE-$VERSION/$FILENAME |
| 129 | |
| 130 | This function expects the '/static/' prefix to already be stripped off. |
| 131 | |
| 132 | Args: |
| 133 | endpoint: A string which is the matched URL path after /static/ |
| 134 | """ |
| 135 | build_config, milestone, filename = [''] * 3 |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 136 | try: |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 137 | parts = endpoint.split('/') |
| 138 | build_config = parts[0] |
| 139 | if len(parts) >= 2: |
| 140 | version = parts[1] |
| 141 | milestone = version[:version.index('-')] |
Paul Hobbs | 5c56c83 | 2016-07-22 17:21:57 -0700 | [diff] [blame] | 142 | if not MILESTONE_PATTERN.match(milestone): |
| 143 | milestone = '' |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 144 | if len(parts) >= 3: |
Paul Hobbs | 5c56c83 | 2016-07-22 17:21:57 -0700 | [diff] [blame] | 145 | filename = MatchAny(parts[-1], FILENAME_PATTERNS) |
| 146 | |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 147 | except IndexError as e: |
Congbin Guo | c2b908c | 2019-09-09 16:24:38 -0700 | [diff] [blame^] | 148 | logging.debug('%s failed to parse. Caught %s', endpoint, str(e)) |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 149 | |
| 150 | return build_config, milestone, filename |
| 151 | |
| 152 | |
| 153 | def EmitStaticRequestMetric(m): |
| 154 | """Emits a Counter metric for sucessful GETs to /static endpoints. |
| 155 | |
| 156 | Args: |
| 157 | m: A regex match object |
| 158 | """ |
| 159 | build_config, milestone, filename = ParseStaticEndpoint(m.group('endpoint')) |
| 160 | |
| 161 | try: |
| 162 | size = int(m.group('size')) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 163 | except ValueError: # Zero is represented by "-" |
| 164 | size = 0 |
| 165 | |
| 166 | metrics.Counter(STATIC_GET_METRIC_NAME).increment_by( |
| 167 | size, fields={ |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 168 | 'build_config': build_config, |
| 169 | 'milestone': milestone, |
Paul Hobbs | 487e381 | 2016-07-22 15:45:33 -0700 | [diff] [blame] | 170 | 'in_lab': InLab(m.group('ip_addr')), |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 171 | 'endpoint': filename}) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 172 | |
| 173 | |
| 174 | def RunMatchers(stream, matchers): |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 175 | """Parses lines of |stream| using patterns and emitters from |matchers| |
| 176 | |
| 177 | Args: |
| 178 | stream: A file object to read from. |
| 179 | matchers: A list of pairs of (matcher, emitter), where matcher is a regex |
| 180 | and emitter is a function called when the regex matches. |
| 181 | """ |
Paul Hobbs | 338baee | 2016-07-13 13:42:34 -0700 | [diff] [blame] | 182 | for line in iter(stream.readline, ''): |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 183 | for matcher, emitter in matchers: |
Paul Hobbs | 338baee | 2016-07-13 13:42:34 -0700 | [diff] [blame] | 184 | logging.debug('Emitting %s for input "%s"', |
| 185 | emitter.__name__, line.strip()) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 186 | m = matcher.match(line) |
| 187 | if m: |
| 188 | emitter(m) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 189 | |
| 190 | |
| 191 | # TODO(phobbs) add a matcher for all requests, not just static files. |
| 192 | MATCHERS = [ |
| 193 | (STATIC_GET_MATCHER, EmitStaticRequestMetric), |
| 194 | ] |
| 195 | |
| 196 | |
| 197 | def ParseArgs(): |
| 198 | """Parses command line arguments.""" |
| 199 | p = argparse.ArgumentParser( |
| 200 | description='Parses apache logs and emits metrics to Monarch') |
xixuan | ebdb0a8 | 2017-04-28 11:25:02 -0700 | [diff] [blame] | 201 | p.add_argument('--logfile', required=True) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 202 | return p.parse_args() |
| 203 | |
| 204 | |
| 205 | def main(): |
| 206 | """Sets up logging and runs matchers against stdin""" |
| 207 | args = ParseArgs() |
| 208 | root = logging.getLogger() |
xixuan | ebdb0a8 | 2017-04-28 11:25:02 -0700 | [diff] [blame] | 209 | |
Paul Hobbs | 84acd9d | 2017-09-20 15:10:53 -0700 | [diff] [blame] | 210 | root.addHandler(handlers.TimedRotatingFileHandler( |
Paul Hobbs | fe0b1c6 | 2017-08-18 12:56:14 -0700 | [diff] [blame] | 211 | args.logfile, when=_LOG_ROTATION_TIME, |
| 212 | interval=_LOG_ROTATION_INTERVAL, |
| 213 | backupCount=_LOG_ROTATION_BACKUP)) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 214 | root.setLevel(logging.DEBUG) |
Paul Hobbs | fe0b1c6 | 2017-08-18 12:56:14 -0700 | [diff] [blame] | 215 | with ts_mon_config.SetupTsMonGlobalState('devserver_apache_log_metrics', |
| 216 | indirect=True): |
| 217 | RunMatchers(sys.stdin, MATCHERS) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 218 | |
| 219 | |
| 220 | if __name__ == '__main__': |
| 221 | main() |