metrics: Add function to extract metrics from HTTP requests
Bug: 897394
Change-Id: I92d2514e4347f02dbcf2884bef3a073095ee03a3
Reviewed-on: https://chromium-review.googlesource.com/c/1292242
Commit-Queue: Edward Lesmes <ehmaldonado@chromium.org>
Reviewed-by: Andrii Shyshkalov <tandrii@chromium.org>
diff --git a/metrics_utils.py b/metrics_utils.py
index 9bdae9e..7cff588 100644
--- a/metrics_utils.py
+++ b/metrics_utils.py
@@ -5,9 +5,11 @@
from __future__ import print_function
+import re
import scm
import subprocess2
import sys
+import urlparse
from third_party import colorama
@@ -68,6 +70,64 @@
'https://webrtc.googlesource.com/src',
}
+KNOWN_HTTP_HOSTS = {
+ 'chrome-internal-review.googlesource.com',
+ 'chromium-review.googlesource.com',
+ 'dart-review.googlesource.com',
+ 'eu1-mirror-chromium-review.googlesource.com',
+ 'pdfium-review.googlesource.com',
+ 'skia-review.googlesource.com',
+ 'us1-mirror-chromium-review.googlesource.com',
+ 'us2-mirror-chromium-review.googlesource.com',
+ 'us3-mirror-chromium-review.googlesource.com',
+ 'webrtc-review.googlesource.com',
+}
+
+KNOWN_HTTP_METHODS = {
+ 'DELETE',
+ 'GET',
+ 'PATCH',
+ 'POST',
+ 'PUT',
+}
+
+KNOWN_HTTP_PATHS = {
+ 'accounts':
+ re.compile(r'(/a)?/accounts/.*'),
+ 'changes':
+ re.compile(r'(/a)?/changes/([^/]+)?$'),
+ 'changes/abandon':
+ re.compile(r'(/a)?/changes/.*/abandon'),
+ 'changes/comments':
+ re.compile(r'(/a)?/changes/.*/comments'),
+ 'changes/detail':
+ re.compile(r'(/a)?/changes/.*/detail'),
+ 'changes/edit':
+ re.compile(r'(/a)?/changes/.*/edit'),
+ 'changes/message':
+ re.compile(r'(/a)?/changes/.*/message'),
+ 'changes/restore':
+ re.compile(r'(/a)?/changes/.*/restore'),
+ 'changes/reviewers':
+ re.compile(r'(/a)?/changes/.*/reviewers/.*'),
+ 'changes/revisions/commit':
+ re.compile(r'(/a)?/changes/.*/revisions/.*/commit'),
+ 'changes/revisions/review':
+ re.compile(r'(/a)?/changes/.*/revisions/.*/review'),
+ 'changes/submit':
+ re.compile(r'(/a)?/changes/.*/submit'),
+ 'projects/branches':
+ re.compile(r'(/a)?/projects/.*/branches/.*'),
+}
+
+KNOWN_HTTP_ARGS = {
+ 'ALL_REVISIONS',
+ 'CURRENT_COMMIT',
+ 'CURRENT_REVISION',
+ 'DETAILED_ACCOUNTS',
+ 'LABELS',
+}
+
def get_python_version():
"""Return the python version in the major.minor.micro format."""
@@ -92,6 +152,54 @@
return int(duration) >> 19
+def extract_http_metrics(request_uri, method, status, response_time):
+ """Extract metrics from the request URI.
+
+ Extracts the host, path, and arguments from the request URI, and returns them
+ along with the method, status and response time.
+
+ The host, method, path and arguments must be in the KNOWN_HTTP_* constants
+ defined above.
+
+ Arguments are the values of the o= url parameter. In Gerrit, additional fields
+ can be obtained by adding o parameters, each option requires more database
+ lookups and slows down the query response time to the client, so we make an
+ effort to collect them.
+
+ The regex defined in KNOWN_HTTP_PATH_RES are checked against the path, and
+ those that match will be returned.
+ """
+ http_metrics = {
+ 'status': status,
+ 'response_time': response_time,
+ }
+
+ if method in KNOWN_HTTP_METHODS:
+ http_metrics['method'] = method
+
+ parsed_url = urlparse.urlparse(request_uri)
+
+ if parsed_url.netloc in KNOWN_HTTP_HOSTS:
+ http_metrics['host'] = parsed_url.netloc
+
+ for name, path_re in KNOWN_HTTP_PATHS.iteritems():
+ if path_re.match(parsed_url.path):
+ http_metrics['path'] = name
+ break
+
+ parsed_query = urlparse.parse_qs(parsed_url.query)
+
+ # Collect o-parameters from the request.
+ args = [
+ arg for arg in parsed_query.get('o', [])
+ if arg in KNOWN_HTTP_ARGS
+ ]
+ if args:
+ http_metrics['arguments'] = args
+
+ return http_metrics
+
+
def get_repo_timestamp(path_to_repo):
"""Get an approximate timestamp for the upstream of |path_to_repo|.