pkg_size: create a simple CLI tool to dump a system size report
pkg_size dumps the sizes of the packages contained within a built root.
It will also push gauge data (package size) per package and per
invocation (total root size) to the build api metrics append-only queue
via append_metrics_log. This data is to be used for build metrics trend
reporting.
BUG=chromium:1000449
TEST=cros_sdk -- '$HOME/trunk/chromite/run_tests'
Cq-Depend: chromium:1834120
Change-Id: I735057f9bcfe4de367a8df888e76a09836e371d1
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/1801037
Tested-by: Will Bradley <wbbradley@chromium.org>
Commit-Queue: Will Bradley <wbbradley@chromium.org>
Reviewed-by: Mike Frysinger <vapier@chromium.org>
Reviewed-by: Alex Klein <saklein@chromium.org>
diff --git a/api/metrics.py b/api/metrics.py
index cdf438e..1dba763 100644
--- a/api/metrics.py
+++ b/api/metrics.py
@@ -38,14 +38,14 @@
# Reduce over the input events to append output_events.
for input_event in metrics.read_metrics_events():
if input_event.op == metrics.OP_START_TIMER:
- timers[input_event.key] = (input_event.name,
+ timers[input_event.arg] = (input_event.name,
input_event.timestamp_epoch_millis)
elif input_event.op == metrics.OP_STOP_TIMER:
# TODO(wbbradley): Drop the None fallback https://crbug.com/1001909.
- timer = timers.pop(input_event.key, None)
+ timer = timers.pop(input_event.arg, None)
if timer is None:
logging.error('%s: stop timer recorded, but missing start timer!?',
- input_event.key)
+ input_event.arg)
if timer:
assert input_event.name == timer[0]
output_event = output_events.add()
@@ -57,6 +57,11 @@
output_event = output_events.add()
output_event.name = make_name(input_event.name)
output_event.timestamp_milliseconds = input_event.timestamp_epoch_millis
+ elif input_event.op == metrics.OP_GAUGE:
+ output_event = output_events.add()
+ output_event.name = make_name(input_event.name)
+ output_event.timestamp_milliseconds = input_event.timestamp_epoch_millis
+ output_event.gauge = input_event.arg
else:
raise ValueError('unexpected op "%s" found in metric event: %s' % (
input_event.op, input_event))
diff --git a/api/metrics_unittest.py b/api/metrics_unittest.py
index f900f89..a4b47d1 100644
--- a/api/metrics_unittest.py
+++ b/api/metrics_unittest.py
@@ -12,8 +12,8 @@
from chromite.api import metrics
from chromite.api.gen.chromite.api import build_api_test_pb2
from chromite.lib import cros_test_lib
-from chromite.utils.metrics import (MetricEvent, OP_NAMED_EVENT, OP_START_TIMER,
- OP_STOP_TIMER)
+from chromite.utils.metrics import (MetricEvent, OP_GAUGE, OP_NAMED_EVENT,
+ OP_START_TIMER, OP_STOP_TIMER)
class MetricsTest(cros_test_lib.TestCase):
@@ -23,8 +23,8 @@
"""Test timer math and deserialization into proto objects."""
response = build_api_test_pb2.TestResultMessage()
mock_events = [
- MetricEvent(600, 'a.b', OP_START_TIMER, key='100'),
- MetricEvent(1000, 'a.b', OP_STOP_TIMER, key='100'),
+ MetricEvent(600, 'a.b', OP_START_TIMER, arg='100'),
+ MetricEvent(1000, 'a.b', OP_STOP_TIMER, arg='100'),
]
with mock.patch('chromite.api.metrics.metrics.read_metrics_events',
return_value=mock_events):
@@ -41,7 +41,7 @@
"""
response = build_api_test_pb2.TestResultMessage()
mock_events = [
- MetricEvent(1000, 'a.named_event', OP_NAMED_EVENT, key=None),
+ MetricEvent(1000, 'a.named_event', OP_NAMED_EVENT, arg=None),
]
with mock.patch('chromite.api.metrics.metrics.read_metrics_events',
return_value=mock_events):
@@ -50,3 +50,17 @@
self.assertEqual(response.events[0].name, 'prefix.a.named_event')
self.assertEqual(response.events[0].timestamp_milliseconds, 1000)
self.assertFalse(response.events[0].duration_milliseconds)
+
+ def testDeserializeGauge(self):
+ """Test deserialization of a gauge."""
+ response = build_api_test_pb2.TestResultMessage()
+ mock_events = [
+ MetricEvent(1000, 'a.gauge', OP_GAUGE, arg=17),
+ ]
+ with mock.patch('chromite.api.metrics.metrics.read_metrics_events',
+ return_value=mock_events):
+ metrics.deserialize_metrics_log(response.events)
+ self.assertEqual(len(response.events), 1)
+ self.assertEqual(response.events[0].name, 'a.gauge')
+ self.assertEqual(response.events[0].timestamp_milliseconds, 1000)
+ self.assertEqual(response.events[0].gauge, 17)
diff --git a/bin/README.md b/bin/README.md
new file mode 100644
index 0000000..7274fb8
--- /dev/null
+++ b/bin/README.md
@@ -0,0 +1,4 @@
+# chromite/bin
+
+This subdirectory is in the $PATH for the SDK. We should only put things in here
+that we expect developers or tools to need.
diff --git a/lib/portage_util.py b/lib/portage_util.py
index ec9f4bb..da89235 100644
--- a/lib/portage_util.py
+++ b/lib/portage_util.py
@@ -2258,3 +2258,51 @@
"""
result = _Portageq(['match', '/', atom], board=board)
return SplitCPV(result.output.strip()) if result.output else None
+
+
+class PackageNotFoundError(Error):
+ """Error indicating that the package asked for was not found."""
+
+
+def GenerateInstalledPackages(db, root, packages):
+ """Generate a sequence of installed package objects from package names."""
+ for package in packages:
+ category, pv = package.split('/')
+ installed_package = db.GetInstalledPackage(category, pv)
+ if not installed_package:
+ raise PackageNotFoundError('Unable to locate package %s in %s' % (package,
+ root))
+ yield installed_package
+
+
+def GeneratePackageSizes(db, root, installed_packages):
+ """Collect package sizes and generate package size pairs.
+
+ Yields:
+ (str, int): A pair of cpv and total package size.
+ """
+ visited_cpvs = set()
+ for installed_package in installed_packages:
+ package_cpv = '%s/%s' % (installed_package.category, installed_package.pf)
+
+ assert package_cpv not in visited_cpvs
+ visited_cpvs.add(package_cpv)
+
+ total_package_filesize = 0
+ if not installed_package:
+ raise PackageNotFoundError('Unable to locate installed_package %s in %s' %
+ (package_cpv, root))
+ for content_type, path in installed_package.ListContents():
+ if content_type == InstalledPackage.OBJ:
+ filename = os.path.join(db.root, path)
+ try:
+ filesize = os.path.getsize(filename)
+ except OSError as e:
+ logging.warn('unable to compute the size of %s (skipping): %s',
+ filename, e)
+ continue
+ logging.debug('size of %s = %d', filename, filesize)
+ total_package_filesize += filesize
+ logging.debug('%s installed_package size is %d', package_cpv,
+ total_package_filesize)
+ yield (package_cpv, total_package_filesize)
diff --git a/lib/portage_util_unittest.py b/lib/portage_util_unittest.py
index 8a25c6d..cd7bb04 100644
--- a/lib/portage_util_unittest.py
+++ b/lib/portage_util_unittest.py
@@ -1321,6 +1321,25 @@
self.fake_packages.sort()
self.assertEqual(self.fake_packages, packages)
+ def testGeneratePackageSizes(self):
+ """Test if calculating installed package sizes works."""
+ fake_data = 'FAKE DATA'
+ expected_size = 0
+ for fake_file in self.fake_files:
+ if fake_file[0] == 'obj':
+ fake_filename = os.path.join(self.fake_chroot,
+ os.path.relpath(fake_file[1], '/'))
+ osutils.WriteFile(fake_filename, fake_data, makedirs=True)
+ expected_size += len(fake_data)
+
+ portage_db = portage_util.PortageDB(self.fake_chroot)
+ installed_packages = portage_db.InstalledPackages()
+ package_size_pairs = portage_util.GeneratePackageSizes(portage_db,
+ 'fake_chroot',
+ installed_packages)
+ total_size = sum(x for _, x in package_size_pairs)
+ self.assertEqual(total_size, expected_size)
+
def testIsPackageInstalled(self):
"""Test if checking the existence of an installed package works."""
self.assertTrue(portage_util.IsPackageInstalled(
diff --git a/scripts/emit_metric.py b/scripts/emit_metric.py
index 9e58dba..928365c 100644
--- a/scripts/emit_metric.py
+++ b/scripts/emit_metric.py
@@ -14,16 +14,19 @@
def main(argv):
"""Emit a metric event."""
parser = commandline.ArgumentParser(description=__doc__)
- parser.add_argument('op', choices=metrics.VALID_OPS,
+ parser.add_argument('op', choices=sorted(metrics.VALID_OPS),
help='Which metric event operator to emit.')
parser.add_argument('name',
help='The name of the metric event as you would like it '
'to appear downstream in data stores.')
- parser.add_argument('key', nargs='?',
- help='A unique key for this invocation to ensure that '
- 'start and stop timers can be matched.')
+ parser.add_argument('arg', nargs='?',
+ help='An accessory argument dependent upon the "op".')
opts = parser.parse_args(argv)
+ if opts.arg and not metrics.OP_EXPECTS_ARG[opts.op]:
+ # We do not expect to get an |arg| for this |op|.
+ parser.error('Unexpected arg "%s" given for op "%s"' % (opts.arg,
+ opts.op))
+
timestamp = metrics.current_milli_time()
- key = opts.key or opts.name
- metrics.append_metrics_log(timestamp, opts.name, opts.op, key=key)
+ metrics.append_metrics_log(timestamp, opts.name, opts.op, arg=opts.arg)
diff --git a/scripts/pkg_size b/scripts/pkg_size
new file mode 120000
index 0000000..b7045c5
--- /dev/null
+++ b/scripts/pkg_size
@@ -0,0 +1 @@
+wrapper.py
\ No newline at end of file
diff --git a/scripts/pkg_size.py b/scripts/pkg_size.py
new file mode 100644
index 0000000..1a7e845
--- /dev/null
+++ b/scripts/pkg_size.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""The Package Size Reporting CLI entry point."""
+
+from __future__ import print_function
+
+import json
+
+from chromite.lib import commandline
+from chromite.lib import portage_util
+from chromite.utils import metrics
+
+
+def _get_parser():
+ """Create an argument parser for this script."""
+ parser = commandline.ArgumentParser(description=__doc__)
+ parser.add_argument('--root', required=True, type='path',
+ help='Specify the rootfs to investigate.')
+ parser.add_argument('--image-type',
+ help='Specify the type of image being investigated. '
+ 'e.g. [base, dev, test]')
+ parser.add_argument('--partition-name',
+ help='Specify the partition name. '
+ 'e.g. [rootfs, stateful]')
+ parser.add_argument('packages', nargs='*',
+ help='Names of packages to investigate. Must be '
+ 'specified as category/package-version.')
+ return parser
+
+
+def generate_package_size_report(db, root, image_type, partition_name,
+ installed_packages):
+ """Collect package sizes and generate a report."""
+ results = {}
+ total_size = 0
+ package_sizes = portage_util.GeneratePackageSizes(db, root,
+ installed_packages)
+ timestamp = metrics.current_milli_time()
+ for package_cpv, size in package_sizes:
+ results[package_cpv] = size
+ metrics.append_metrics_log(timestamp,
+ 'package_size.%s.%s.%s' % (image_type,
+ partition_name,
+ package_cpv),
+ metrics.OP_GAUGE,
+ arg=size)
+ total_size += size
+
+ metrics.append_metrics_log(timestamp,
+ 'total_size.%s.%s' % (image_type, partition_name),
+ metrics.OP_GAUGE,
+ arg=total_size)
+ return {'root': root, 'package_sizes': results, 'total_size': total_size}
+
+
+def main(argv):
+ """Find and report approximate size info for a particular built package."""
+ commandline.RunInsideChroot()
+
+ parser = _get_parser()
+ opts = parser.parse_args(argv)
+ opts.Freeze()
+
+ db = portage_util.PortageDB(root=opts.root)
+
+ if opts.packages:
+ installed_packages = portage_util.GenerateInstalledPackages(db, opts.root,
+ opts.packages)
+ else:
+ installed_packages = db.InstalledPackages()
+
+ results = generate_package_size_report(db, opts.root, opts.image_type,
+ opts.partition_name,
+ installed_packages)
+ print(json.dumps(results))
diff --git a/utils/metrics.py b/utils/metrics.py
index a070dff..e80f36c 100644
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -26,13 +26,22 @@
OP_START_TIMER = 'start-timer'
OP_STOP_TIMER = 'stop-timer'
+OP_GAUGE = 'gauge'
OP_NAMED_EVENT = 'event'
-VALID_OPS = (OP_START_TIMER, OP_STOP_TIMER, OP_NAMED_EVENT)
+OP_EXPECTS_ARG = {
+ OP_START_TIMER: True,
+ OP_STOP_TIMER: True,
+ OP_NAMED_EVENT: False,
+ OP_GAUGE: True,
+}
+VALID_OPS = set(OP_EXPECTS_ARG)
-# MetricEvent store a start or a stop to a timer. Timers are keyed
-# with a unique value to make matching the bookends easier.
+# MetricEvent stores one of a few different types of metric events. The 'arg'
+# parameter is an overloaded value which is discriminated by the 'op' parameter.
+# Timers utilize 'arg' as a key value for disambiguation, and gauges use the arg
+# as their gauge value.
MetricEvent = collections.namedtuple('MetricEvent', ('timestamp_epoch_millis',
- 'name', 'op', 'key'))
+ 'name', 'op', 'arg'))
class Error(Exception):
@@ -70,14 +79,14 @@
(len(terms), terms))
assert terms[2] in {OP_START_TIMER, OP_STOP_TIMER}
- return MetricEvent(int(terms[0]), terms[1], terms[2], terms[3])
+ return MetricEvent(int(terms[0]), terms[1], terms[2], arg=terms[3])
def parse_named_event(terms):
"""Parse a named event line.
Args:
- terms: A list of the subdimensions of the MetricEvent type, omitting "key".
+ terms: A list of the subdimensions of the MetricEvent type, omitting "arg".
Returns:
A MetricEvent from the content of the terms.
@@ -91,7 +100,29 @@
(len(terms), terms))
assert terms[2] == OP_NAMED_EVENT
- return MetricEvent(int(terms[0]), terms[1], terms[2], key=None)
+ return MetricEvent(int(terms[0]), terms[1], terms[2], arg=None)
+
+
+def parse_gauge(terms):
+ """Parse a gauge, which is an event with an associated integer value.
+
+ Args:
+ terms: A list of the subdimensions of the MetricEvent type, leveraging |arg|
+ as a container for the actual gauge value.
+
+ Returns:
+ A MetricEvent from the content of the terms.
+
+ Raises:
+ ParseMetricError: An error occurred parsing the data from the list of terms.
+ """
+ if len(terms) != 4:
+ raise ParseMetricError('Incorrect number of terms for gauge. Should '
+ 'have been 4, instead it is %d. See terms %s.' %
+ (len(terms), terms))
+
+ assert terms[2] == OP_GAUGE
+ return MetricEvent(int(terms[0]), terms[1], terms[2], arg=int(terms[3]))
def get_metric_parser(op):
@@ -100,6 +131,7 @@
OP_START_TIMER: parse_timer,
OP_STOP_TIMER: parse_timer,
OP_NAMED_EVENT: parse_named_event,
+ OP_GAUGE: parse_gauge,
}[op]
@@ -153,7 +185,7 @@
return wrapper
-def append_metrics_log(timestamp, name, op, key=None):
+def append_metrics_log(timestamp, name, op, arg=None):
"""Handle appending a list of terms to the metrics log.
If the environment does not specify a metrics log, then skip silently.
@@ -162,12 +194,12 @@
timestamp: A millisecond epoch timestamp.
name: A period-separated string describing the event.
op: One of the OP_* values, determining which type of event this is.
- key: An optional key to disambiguate equivalenty named events.
+ arg: An accessory value for use based on the related |op|.
"""
metrics_log = os.environ.get(UTILS_METRICS_LOG_ENVVAR)
terms = [timestamp, name.replace('|', '_'), op]
- if key:
- terms.append(key)
+ if arg:
+ terms.append(arg)
# Format the actual line to log.
line = '|'.join(str(x) for x in terms)
@@ -187,14 +219,14 @@
Yields:
Context for context manager surrounding event emission.
"""
- # Timer events use a "key" to disambiguate in case of multiple concurrent or
+ # Timer events use a |arg| to disambiguate in case of multiple concurrent or
# overlapping timers with the same name.
key = uuid.uuid4()
try:
- append_metrics_log(current_milli_time(), name, OP_START_TIMER, key=key)
+ append_metrics_log(current_milli_time(), name, OP_START_TIMER, arg=key)
yield
finally:
- append_metrics_log(current_milli_time(), name, OP_STOP_TIMER, key=key)
+ append_metrics_log(current_milli_time(), name, OP_STOP_TIMER, arg=key)
def event(name):