Gs Cache: nginx log metrics
In this change, we add a metric to monitor Nginx access log file for Gs
Cache service. It allow us to know what's the cache status, HIT/MISS
etc.
BUG=chromium:824580
TEST=Ran unit tests.
Change-Id: Ie4de6444f84700d053dd7a53a76955ce6d12d8b7
Reviewed-on: https://chromium-review.googlesource.com/1168399
Commit-Ready: ChromeOS CL Exonerator Bot <chromiumos-cl-exonerator@appspot.gserviceaccount.com>
Tested-by: Congbin Guo <guocb@chromium.org>
Reviewed-by: Prathmesh Prabhu <pprabhu@chromium.org>
diff --git a/bin/gs_cache_nginx_metrics b/bin/gs_cache_nginx_metrics
new file mode 100755
index 0000000..e7a49b4
--- /dev/null
+++ b/bin/gs_cache_nginx_metrics
@@ -0,0 +1,12 @@
+#!/bin/bash
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Run Google Storage archive server from inside virtual environment.
+set -eu
+readonly bindir=$(dirname -- "$(readlink -e -- "$0")")
+readonly homedir=$(cd "$bindir"/../gs_cache; pwd)
+export PYTHONPATH=$homedir
+
+exec vpython -vpython-spec $homedir/.vpython -m nginx_access_log_metrics "$@"
diff --git a/gs_cache/nginx_access_log_metrics.py b/gs_cache/nginx_access_log_metrics.py
new file mode 100644
index 0000000..8ec0ce8
--- /dev/null
+++ b/gs_cache/nginx_access_log_metrics.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""This script generate monarch metrics from Gs Cache server logs.
+
+It parses the Nginx access log file of Gs Cache, e.g.
+/var/log/nginx/gs-cache-server.access.log, and generate monarch metrics for Gs
+Cache performance.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import re
+import sys
+from logging import handlers
+
+from chromite.lib import cros_logging as logging
+from chromite.lib import metrics
+from chromite.lib import ts_mon_config
+
+_LOG_ROTATION_TIME = 'H'
+_LOG_ROTATION_INTERVAL = 24 # hours
+_LOG_ROTATION_BACKUP = 14 # Keep for 14 days.
+
+_METRIC_NAME = 'chromeos/gs_cache/nginx/response'
+
+# An example Nginx access log line is:
+# <ip> 2018-07-26T18:13:49-07:00 "GET URL HTTP/1.1" 200 <size> "<agent>" HIT
+_SUCCESS_RESPONSE_MATCHER = re.compile(
+ r'^(?P<ip_addr>\d+\.\d+\.\d+\.\d+) '
+ r'(?P<timestamp>\d+\-\d+\-\d+T\d+:\d+:\d+[+\-]\d+:\d+) '
+ r'"GET (?P<url_path>\S*)[^"]*" '
+ r'2\d\d (?P<size>\S+) "[^"]+" (?P<cache_status>\S+)')
+
+# The format of URL is like:
+# /$ACTION/$BUCKET/$BUILD/$MILESTONE-$VERSION/$FILENAME?params
+# We want ACTION, BUCKET, BUILD, MILESTONE, and FILENAME.
+_URL_PATH_MATCHER = re.compile(
+ r'^/(?P<action>[^/]+)/(?P<bucket>[^/]+)/(?P<build>[^/]+)/'
+ r'(?P<milestone>\S+)\-(?P<version>[^/]+)/(?P<filename>[^\?]+)'
+)
+
+
+def emit_successful_response_metric(m):
+ """Emit a Counter metric for a successful response.
+
+ We parse the response line and uploads the size of response to a cumulative
+ metric. Especially, the metric has a designate field of 'cache' which
+ indicates the upstream cache status, e.g. HIT/MISS etc. For all list of cache
+ status, see
+ http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_cache_status
+
+ Args:
+ m: A regex match object or None.
+ """
+ if not m:
+ return
+
+ # Ignore all loopback calls between gs_archive_server and Nginx.
+ if m.group('ip_addr') == '127.0.0.1':
+ return
+
+ logging.debug('Emitting successful response metric.')
+ metric_fields = {
+ 'cache': m.group('cache_status'),
+ 'action': '',
+ 'bucket': '',
+ 'build': '',
+ 'milestone': '',
+ 'version': '',
+ 'endpoint': '',
+ }
+ requested_file_info = _URL_PATH_MATCHER.match(m.group('url_path'))
+ if requested_file_info:
+ metric_fields.update({
+ 'action': requested_file_info.group('action'),
+ 'bucket': requested_file_info.group('bucket'),
+ 'build': requested_file_info.group('build'),
+ 'milestone': requested_file_info.group('milestone'),
+ 'version': requested_file_info.group('version'),
+ 'endpoint': requested_file_info.group('filename'),
+ })
+ metrics.Counter(_METRIC_NAME).increment_by(int(m.group('size')),
+ fields=metric_fields)
+
+
+def input_log_file_type(filename):
+ """A argparse type function converting input filename to file object.
+
+ It converts '-' to sys.stdin.
+ """
+ if filename == '-':
+ return sys.stdin
+ return argparse.FileType('r')(filename)
+
+
+def parse_args(argv):
+ """Parses command line arguments."""
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument(
+ "-i", "--input-log-file", metavar='NGINX_ACCESS_LOG_FILE',
+ dest='input_fd', required=True,
+ type=input_log_file_type,
+ help=("Nginx log file of Gs Cache "
+ "(use '-' to indicate reading from sys.stdin).")
+ )
+ parser.add_argument(
+ "-l", "--log-file", required=True,
+ help="Log file of this script."
+ )
+ return parser.parse_args(argv)
+
+
+def main(argv):
+ """Main function."""
+ args = parse_args(argv)
+
+ logger = logging.getLogger()
+ logger.addHandler(handlers.TimedRotatingFileHandler(
+ args.log_file, when=_LOG_ROTATION_TIME,
+ interval=_LOG_ROTATION_INTERVAL,
+ backupCount=_LOG_ROTATION_BACKUP))
+ logger.setLevel(logging.DEBUG)
+
+ with ts_mon_config.SetupTsMonGlobalState('gs_cache_nginx_log_metrics',
+ indirect=True):
+ for line in args.input_fd:
+ logging.debug('Parsing line: %s', line.strip())
+ emit_successful_response_metric(_SUCCESS_RESPONSE_MATCHER.match(line))
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
diff --git a/gs_cache/tests/nginx_access_log_metrics_test.py b/gs_cache/tests/nginx_access_log_metrics_test.py
new file mode 100644
index 0000000..b854a19
--- /dev/null
+++ b/gs_cache/tests/nginx_access_log_metrics_test.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Unit test script for nginx_access_log_metrics."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+
+import mock
+
+import nginx_access_log_metrics
+
+
+# pylint: disable=protected-access
+class TestMetric(unittest.TestCase):
+ """Test class for nginx_access_log_metrics."""
+
+ def test_match_the_log_line(self):
+ """Test the regex to match a target log line."""
+ match = nginx_access_log_metrics._SUCCESS_RESPONSE_MATCHER.match(
+ '100.109.169.118 2018-08-01T09:11:27-07:00 "GET a_url HTTP/1.1" '
+ '200 12345 "agent/1.2.3" HIT')
+ self.assertTrue(match)
+
+ self.assertEqual(match.group('ip_addr'), '100.109.169.118')
+ self.assertEqual(match.group('url_path'), 'a_url')
+ self.assertEqual(match.group('size'), '12345')
+ self.assertEqual(match.group('cache_status'), 'HIT')
+
+ def test_match_URL_path(self):
+ """Test the regex to match a URL path."""
+ url = '/extract/a_bucket/a-release/R1-2.3/archive'
+ # The match works for URL has or hasn't parameter.
+ for u in [url, url + '?key=value']:
+ match = nginx_access_log_metrics._URL_PATH_MATCHER.match(u)
+
+ self.assertEqual(match.group('action'), 'extract')
+ self.assertEqual(match.group('bucket'), 'a_bucket')
+ self.assertEqual(match.group('build'), 'a-release')
+ self.assertEqual(match.group('milestone'), 'R1')
+ self.assertEqual(match.group('version'), '2.3')
+ self.assertEqual(match.group('filename'), 'archive')
+
+ def test_emitter(self):
+ """Test the emitter."""
+ match = nginx_access_log_metrics._SUCCESS_RESPONSE_MATCHER.match(
+ '100.109.169.118 2018-08-01T09:11:27-07:00 "GET '
+ '/extract/a_bucket/build/R1-2.3/archive HTTP/1.1" '
+ '200 12345 "agent/1.2.3" HIT')
+ # Calling the emitter should not raise any exceptions (for example, by
+ # referencing regex match groups that don't exist.
+ with mock.patch.object(nginx_access_log_metrics, 'metrics') as m:
+ nginx_access_log_metrics.emit_successful_response_metric(match)
+ m.Counter.assert_called_with(nginx_access_log_metrics._METRIC_NAME)