xixuan | ebdb0a8 | 2017-04-28 11:25:02 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python2 |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 2 | |
| 3 | # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
| 7 | """Script to upload metrics from apache logs to Monarch. |
| 8 | |
| 9 | We are interested in static file bandwidth, so it parses out GET requests to |
| 10 | /static and uploads the sizes to a cumulative metric. |
| 11 | """ |
| 12 | from __future__ import print_function |
| 13 | |
| 14 | import argparse |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 15 | import re |
| 16 | import sys |
| 17 | |
xixuan | ebdb0a8 | 2017-04-28 11:25:02 -0700 | [diff] [blame] | 18 | # TODO(ayatane): Fix cros lint pylint to work with virtualenv imports |
| 19 | # pylint: disable=import-error |
| 20 | from devserver_lib.devserver import MakeLogHandler |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 21 | |
xixuan | ebdb0a8 | 2017-04-28 11:25:02 -0700 | [diff] [blame] | 22 | # only import setup_chromite before chromite import. |
| 23 | import setup_chromite # pylint: disable=unused-import |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 24 | from chromite.lib import ts_mon_config |
| 25 | from chromite.lib import metrics |
Paul Hobbs | 338baee | 2016-07-13 13:42:34 -0700 | [diff] [blame] | 26 | from chromite.lib import cros_logging as logging |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 27 | from infra_libs import ts_mon |
| 28 | |
| 29 | |
| 30 | STATIC_GET_MATCHER = re.compile( |
| 31 | r'^(?P<ip_addr>\d+\.\d+\.\d+\.\d+) ' |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 32 | r'.*GET /static/(?P<endpoint>\S*)[^"]*" ' |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 33 | r'200 (?P<size>\S+) .*') |
| 34 | |
| 35 | STATIC_GET_METRIC_NAME = 'chromeos/devserver/apache/static_response_size' |
| 36 | |
| 37 | |
| 38 | LAB_SUBNETS = ( |
| 39 | ("172.17.40.0", 22), |
| 40 | ("100.107.160.0", 19), |
| 41 | ("100.115.128.0", 17), |
| 42 | ("100.115.254.126", 25), |
| 43 | ("100.107.141.128", 25), |
| 44 | ("172.27.212.0", 22), |
| 45 | ("100.107.156.192", 26), |
| 46 | ("172.22.29.0", 25), |
| 47 | ("172.22.38.0", 23), |
| 48 | ("100.107.224.0", 23), |
| 49 | ("100.107.226.0", 25), |
| 50 | ("100.107.126.0", 25), |
| 51 | ) |
| 52 | |
| 53 | def IPToNum(ip): |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 54 | """Returns the integer represented by an IPv4 string. |
| 55 | |
| 56 | Args: |
| 57 | ip: An IPv4-formatted string. |
| 58 | """ |
Paul Hobbs | 487e381 | 2016-07-22 15:45:33 -0700 | [diff] [blame] | 59 | return reduce(lambda seed, x: seed * 2**8 + int(x), |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 60 | ip.split('.'), |
| 61 | 0) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 62 | |
| 63 | |
| 64 | def MatchesSubnet(ip, base, mask): |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 65 | """Whether the ip string |ip| matches the subnet |base|, |mask|. |
| 66 | |
| 67 | Args: |
| 68 | ip: An IPv4 string. |
| 69 | base: An IPv4 string which is the lowest value in the subnet. |
| 70 | mask: The number of bits which are not wildcards in the subnet. |
| 71 | """ |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 72 | ip_value = IPToNum(ip) |
| 73 | base_value = IPToNum(base) |
| 74 | mask = (2**mask - 1) << (32 - mask) |
| 75 | return (ip_value & mask) == (base_value & mask) |
| 76 | |
| 77 | |
| 78 | def InLab(ip): |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 79 | """Whether |ip| is an IPv4 address which is in the ChromeOS Lab. |
| 80 | |
| 81 | Args: |
| 82 | ip: An IPv4 address to be tested. |
| 83 | """ |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 84 | return any(MatchesSubnet(ip, base, mask) |
| 85 | for (base, mask) in LAB_SUBNETS) |
| 86 | |
| 87 | |
Paul Hobbs | 5c56c83 | 2016-07-22 17:21:57 -0700 | [diff] [blame] | 88 | MILESTONE_PATTERN = re.compile(r'R\d+') |
| 89 | |
| 90 | FILENAME_CONSTANTS = [ |
| 91 | 'stateful.tgz', |
| 92 | 'client-autotest.tar.bz2', |
| 93 | 'chromiumos_test_image.bin', |
| 94 | 'autotest_server_package.tar.bz2', |
| 95 | ] |
| 96 | |
| 97 | FILENAME_PATTERNS = [(re.compile(s), s) for s in FILENAME_CONSTANTS] + [ |
| 98 | (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'), |
| 99 | (re.compile(r'chromeos_.*_delta_test\.bin-.*'), |
| 100 | 'chromeos_*_delta_test.bin-*'), |
| 101 | (re.compile(r'chromeos_.*_full_test\.bin-.*'), |
| 102 | 'chromeos_*_full_test.bin-*'), |
| 103 | (re.compile(r'test-.*\.bz2'), 'test-*.bz2'), |
| 104 | (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'), |
| 105 | ] |
| 106 | |
| 107 | |
| 108 | def MatchAny(needle, patterns, default=''): |
| 109 | for pattern, value in patterns: |
| 110 | if pattern.match(needle): |
| 111 | return value |
| 112 | return default |
| 113 | |
| 114 | |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 115 | def ParseStaticEndpoint(endpoint): |
| 116 | """Parses a /static/.* URL path into build_config, milestone, and filename. |
| 117 | |
| 118 | Static endpoints are expected to be of the form |
| 119 | /static/$BUILD_CONFIG/$MILESTONE-$VERSION/$FILENAME |
| 120 | |
| 121 | This function expects the '/static/' prefix to already be stripped off. |
| 122 | |
| 123 | Args: |
| 124 | endpoint: A string which is the matched URL path after /static/ |
| 125 | """ |
| 126 | build_config, milestone, filename = [''] * 3 |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 127 | try: |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 128 | parts = endpoint.split('/') |
| 129 | build_config = parts[0] |
| 130 | if len(parts) >= 2: |
| 131 | version = parts[1] |
| 132 | milestone = version[:version.index('-')] |
Paul Hobbs | 5c56c83 | 2016-07-22 17:21:57 -0700 | [diff] [blame] | 133 | if not MILESTONE_PATTERN.match(milestone): |
| 134 | milestone = '' |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 135 | if len(parts) >= 3: |
Paul Hobbs | 5c56c83 | 2016-07-22 17:21:57 -0700 | [diff] [blame] | 136 | filename = MatchAny(parts[-1], FILENAME_PATTERNS) |
| 137 | |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 138 | except IndexError as e: |
| 139 | logging.debug('%s failed to parse. Caught %s' % (endpoint, str(e))) |
| 140 | |
| 141 | return build_config, milestone, filename |
| 142 | |
| 143 | |
| 144 | def EmitStaticRequestMetric(m): |
| 145 | """Emits a Counter metric for sucessful GETs to /static endpoints. |
| 146 | |
| 147 | Args: |
| 148 | m: A regex match object |
| 149 | """ |
| 150 | build_config, milestone, filename = ParseStaticEndpoint(m.group('endpoint')) |
| 151 | |
| 152 | try: |
| 153 | size = int(m.group('size')) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 154 | except ValueError: # Zero is represented by "-" |
| 155 | size = 0 |
| 156 | |
| 157 | metrics.Counter(STATIC_GET_METRIC_NAME).increment_by( |
| 158 | size, fields={ |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 159 | 'build_config': build_config, |
| 160 | 'milestone': milestone, |
Paul Hobbs | 487e381 | 2016-07-22 15:45:33 -0700 | [diff] [blame] | 161 | 'in_lab': InLab(m.group('ip_addr')), |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 162 | 'endpoint': filename}) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 163 | |
| 164 | |
| 165 | def RunMatchers(stream, matchers): |
Paul Hobbs | fa91568 | 2016-07-19 15:11:29 -0700 | [diff] [blame] | 166 | """Parses lines of |stream| using patterns and emitters from |matchers| |
| 167 | |
| 168 | Args: |
| 169 | stream: A file object to read from. |
| 170 | matchers: A list of pairs of (matcher, emitter), where matcher is a regex |
| 171 | and emitter is a function called when the regex matches. |
| 172 | """ |
Paul Hobbs | 338baee | 2016-07-13 13:42:34 -0700 | [diff] [blame] | 173 | for line in iter(stream.readline, ''): |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 174 | for matcher, emitter in matchers: |
Paul Hobbs | 338baee | 2016-07-13 13:42:34 -0700 | [diff] [blame] | 175 | logging.debug('Emitting %s for input "%s"', |
| 176 | emitter.__name__, line.strip()) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 177 | m = matcher.match(line) |
| 178 | if m: |
| 179 | emitter(m) |
| 180 | # The input might terminate if the log gets rotated. Make sure that Monarch |
| 181 | # flushes any pending metrics before quitting. |
| 182 | ts_mon.close() |
| 183 | |
| 184 | |
| 185 | # TODO(phobbs) add a matcher for all requests, not just static files. |
| 186 | MATCHERS = [ |
| 187 | (STATIC_GET_MATCHER, EmitStaticRequestMetric), |
| 188 | ] |
| 189 | |
| 190 | |
| 191 | def ParseArgs(): |
| 192 | """Parses command line arguments.""" |
| 193 | p = argparse.ArgumentParser( |
| 194 | description='Parses apache logs and emits metrics to Monarch') |
xixuan | ebdb0a8 | 2017-04-28 11:25:02 -0700 | [diff] [blame] | 195 | p.add_argument('--logfile', required=True) |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 196 | return p.parse_args() |
| 197 | |
| 198 | |
| 199 | def main(): |
| 200 | """Sets up logging and runs matchers against stdin""" |
| 201 | args = ParseArgs() |
| 202 | root = logging.getLogger() |
xixuan | ebdb0a8 | 2017-04-28 11:25:02 -0700 | [diff] [blame] | 203 | |
Paul Hobbs | ef4e070 | 2016-06-27 17:01:42 -0700 | [diff] [blame] | 204 | root.addHandler(MakeLogHandler(args.logfile)) |
| 205 | root.setLevel(logging.DEBUG) |
| 206 | ts_mon_config.SetupTsMonGlobalState('devserver_apache_log_metrics') |
| 207 | RunMatchers(sys.stdin, MATCHERS) |
| 208 | |
| 209 | |
| 210 | if __name__ == '__main__': |
| 211 | main() |