Gs Cache: nginx log metrics

In this change, we add a metric to monitor Nginx access log file for Gs
Cache service. It allow us to know what's the cache status, HIT/MISS
etc.

BUG=chromium:824580
TEST=Ran unit tests.

Change-Id: Ie4de6444f84700d053dd7a53a76955ce6d12d8b7
Reviewed-on: https://chromium-review.googlesource.com/1168399
Commit-Ready: ChromeOS CL Exonerator Bot <chromiumos-cl-exonerator@appspot.gserviceaccount.com>
Tested-by: Congbin Guo <guocb@chromium.org>
Reviewed-by: Prathmesh Prabhu <pprabhu@chromium.org>
diff --git a/bin/gs_cache_nginx_metrics b/bin/gs_cache_nginx_metrics
new file mode 100755
index 0000000..e7a49b4
--- /dev/null
+++ b/bin/gs_cache_nginx_metrics
@@ -0,0 +1,12 @@
+#!/bin/bash
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Run Google Storage archive server from inside virtual environment.
+set -eu
+readonly bindir=$(dirname -- "$(readlink -e -- "$0")")
+readonly homedir=$(cd "$bindir"/../gs_cache; pwd)
+export PYTHONPATH=$homedir
+
+exec vpython -vpython-spec $homedir/.vpython -m nginx_access_log_metrics "$@"
diff --git a/gs_cache/nginx_access_log_metrics.py b/gs_cache/nginx_access_log_metrics.py
new file mode 100644
index 0000000..8ec0ce8
--- /dev/null
+++ b/gs_cache/nginx_access_log_metrics.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""This script generate monarch metrics from Gs Cache server logs.
+
+It parses the Nginx access log file of Gs Cache, e.g.
+/var/log/nginx/gs-cache-server.access.log, and generate monarch metrics for Gs
+Cache performance.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import re
+import sys
+from logging import handlers
+
+from chromite.lib import cros_logging as logging
+from chromite.lib import metrics
+from chromite.lib import ts_mon_config
+
+_LOG_ROTATION_TIME = 'H'
+_LOG_ROTATION_INTERVAL = 24  # hours
+_LOG_ROTATION_BACKUP = 14  # Keep for 14 days.
+
+_METRIC_NAME = 'chromeos/gs_cache/nginx/response'
+
+# An example Nginx access log line is:
+# <ip> 2018-07-26T18:13:49-07:00 "GET URL HTTP/1.1" 200 <size> "<agent>" HIT
+_SUCCESS_RESPONSE_MATCHER = re.compile(
+    r'^(?P<ip_addr>\d+\.\d+\.\d+\.\d+) '
+    r'(?P<timestamp>\d+\-\d+\-\d+T\d+:\d+:\d+[+\-]\d+:\d+) '
+    r'"GET (?P<url_path>\S*)[^"]*" '
+    r'2\d\d (?P<size>\S+) "[^"]+" (?P<cache_status>\S+)')
+
+# The format of URL is like:
+#   /$ACTION/$BUCKET/$BUILD/$MILESTONE-$VERSION/$FILENAME?params
+# We want ACTION, BUCKET, BUILD, MILESTONE, and FILENAME.
+_URL_PATH_MATCHER = re.compile(
+    r'^/(?P<action>[^/]+)/(?P<bucket>[^/]+)/(?P<build>[^/]+)/'
+    r'(?P<milestone>\S+)\-(?P<version>[^/]+)/(?P<filename>[^\?]+)'
+)
+
+
+def emit_successful_response_metric(m):
+  """Emit a Counter metric for a successful response.
+
+  We parse the response line and uploads the size of response to a cumulative
+  metric. Especially, the metric has a designate field of 'cache' which
+  indicates the upstream cache status, e.g. HIT/MISS etc. For all list of cache
+  status, see
+  http://nginx.org/en/docs/http/ngx_http_upstream_module.html#var_upstream_cache_status
+
+  Args:
+    m: A regex match object or None.
+  """
+  if not m:
+    return
+
+  # Ignore all loopback calls between gs_archive_server and Nginx.
+  if m.group('ip_addr') == '127.0.0.1':
+    return
+
+  logging.debug('Emitting successful response metric.')
+  metric_fields = {
+      'cache': m.group('cache_status'),
+      'action': '',
+      'bucket': '',
+      'build': '',
+      'milestone': '',
+      'version': '',
+      'endpoint': '',
+  }
+  requested_file_info = _URL_PATH_MATCHER.match(m.group('url_path'))
+  if requested_file_info:
+    metric_fields.update({
+        'action': requested_file_info.group('action'),
+        'bucket': requested_file_info.group('bucket'),
+        'build': requested_file_info.group('build'),
+        'milestone': requested_file_info.group('milestone'),
+        'version': requested_file_info.group('version'),
+        'endpoint': requested_file_info.group('filename'),
+    })
+  metrics.Counter(_METRIC_NAME).increment_by(int(m.group('size')),
+                                             fields=metric_fields)
+
+
+def input_log_file_type(filename):
+  """A argparse type function converting input filename to file object.
+
+  It converts '-' to sys.stdin.
+  """
+  if filename == '-':
+    return sys.stdin
+  return argparse.FileType('r')(filename)
+
+
+def parse_args(argv):
+  """Parses command line arguments."""
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument(
+      "-i", "--input-log-file", metavar='NGINX_ACCESS_LOG_FILE',
+      dest='input_fd', required=True,
+      type=input_log_file_type,
+      help=("Nginx log file of Gs Cache "
+            "(use '-' to indicate reading from sys.stdin).")
+  )
+  parser.add_argument(
+      "-l", "--log-file", required=True,
+      help="Log file of this script."
+  )
+  return parser.parse_args(argv)
+
+
+def main(argv):
+  """Main function."""
+  args = parse_args(argv)
+
+  logger = logging.getLogger()
+  logger.addHandler(handlers.TimedRotatingFileHandler(
+      args.log_file, when=_LOG_ROTATION_TIME,
+      interval=_LOG_ROTATION_INTERVAL,
+      backupCount=_LOG_ROTATION_BACKUP))
+  logger.setLevel(logging.DEBUG)
+
+  with ts_mon_config.SetupTsMonGlobalState('gs_cache_nginx_log_metrics',
+                                           indirect=True):
+    for line in args.input_fd:
+      logging.debug('Parsing line: %s', line.strip())
+      emit_successful_response_metric(_SUCCESS_RESPONSE_MATCHER.match(line))
+
+
+if __name__ == "__main__":
+  sys.exit(main(sys.argv[1:]))
diff --git a/gs_cache/tests/nginx_access_log_metrics_test.py b/gs_cache/tests/nginx_access_log_metrics_test.py
new file mode 100644
index 0000000..b854a19
--- /dev/null
+++ b/gs_cache/tests/nginx_access_log_metrics_test.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+# Copyright 2018 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Unit test script for nginx_access_log_metrics."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+
+import mock
+
+import nginx_access_log_metrics
+
+
+# pylint: disable=protected-access
+class TestMetric(unittest.TestCase):
+  """Test class for nginx_access_log_metrics."""
+
+  def test_match_the_log_line(self):
+    """Test the regex to match a target log line."""
+    match = nginx_access_log_metrics._SUCCESS_RESPONSE_MATCHER.match(
+        '100.109.169.118 2018-08-01T09:11:27-07:00 "GET a_url HTTP/1.1" '
+        '200 12345 "agent/1.2.3" HIT')
+    self.assertTrue(match)
+
+    self.assertEqual(match.group('ip_addr'), '100.109.169.118')
+    self.assertEqual(match.group('url_path'), 'a_url')
+    self.assertEqual(match.group('size'), '12345')
+    self.assertEqual(match.group('cache_status'), 'HIT')
+
+  def test_match_URL_path(self):
+    """Test the regex to match a URL path."""
+    url = '/extract/a_bucket/a-release/R1-2.3/archive'
+    # The match works for URL has or hasn't parameter.
+    for u in [url, url + '?key=value']:
+      match = nginx_access_log_metrics._URL_PATH_MATCHER.match(u)
+
+      self.assertEqual(match.group('action'), 'extract')
+      self.assertEqual(match.group('bucket'), 'a_bucket')
+      self.assertEqual(match.group('build'), 'a-release')
+      self.assertEqual(match.group('milestone'), 'R1')
+      self.assertEqual(match.group('version'), '2.3')
+      self.assertEqual(match.group('filename'), 'archive')
+
+  def test_emitter(self):
+    """Test the emitter."""
+    match = nginx_access_log_metrics._SUCCESS_RESPONSE_MATCHER.match(
+        '100.109.169.118 2018-08-01T09:11:27-07:00 "GET '
+        '/extract/a_bucket/build/R1-2.3/archive HTTP/1.1" '
+        '200 12345 "agent/1.2.3" HIT')
+    # Calling the emitter should not raise any exceptions (for example, by
+    # referencing regex match groups that don't exist.
+    with mock.patch.object(nginx_access_log_metrics, 'metrics') as m:
+      nginx_access_log_metrics.emit_successful_response_metric(match)
+      m.Counter.assert_called_with(nginx_access_log_metrics._METRIC_NAME)