scripts: gconv_strip: process gconv-modules.d
New glibc versions moved most gconv module information into a
gconv-modules-extra.conf file in gconv-modules.d [1]. gconv_strip
currently only looks at the gconv-modules file. As a result,
gconv_strip isn't actually stripping much right now.
This change updates gconv_strip to search for additional files in
gconv-modules.d and process them in the exact same way as it processes
the gconv-modules file.
[1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=NEWS;h=f976abccbd6ffe3c2d25b6d22bc9e042ab394fab;hb=7f079fdc16e88ebb8020e17b2fd900e8924da29a#l775
BUG=b:277779682
TEST=build a tatl image; check /usr/lib64/gconv for removed modules
Change-Id: Idce5cf6d4898e41759989f757e719ae63bbdebdd
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/4679252
Tested-by: Robert Kolchmeyer <rkolchmeyer@google.com>
Commit-Queue: Robert Kolchmeyer <rkolchmeyer@google.com>
Reviewed-by: Mike Frysinger <vapier@chromium.org>
Reviewed-by: Alex Klein <saklein@chromium.org>
diff --git a/scripts/gconv_strip.py b/scripts/gconv_strip.py
index 4f509f5..1878003 100644
--- a/scripts/gconv_strip.py
+++ b/scripts/gconv_strip.py
@@ -64,13 +64,15 @@
See the comments on gconv-modules file for syntax details.
"""
- def __init__(self, gconv_modules_file):
+ def __init__(self, gconv_modules_file, modules_dir):
"""Initialize the class.
Args:
gconv_modules_file: Path to gconv/gconv-modules file.
+ modules_dir: Path to the directory that contains the gconv modules.
"""
self._filename = gconv_modules_file
+ self._modules_dir = modules_dir
# An alias map of charsets. The key (fromcharset) is the alias name and
# the value (tocharset) is the real charset name. We also support a
@@ -150,8 +152,6 @@
- used_modules
)
- modules_dir = os.path.dirname(self._filename)
-
all_modules = set.union(used_modules, unused_modules)
# The list of charsets that depend on a given library. For example,
# libdeps['libCNS.so'] is the set of all the modules that require that
@@ -159,7 +159,9 @@
libdeps = {}
for module in all_modules:
deps = lddtree.ParseELF(
- os.path.join(modules_dir, "%s.so" % module), modules_dir, []
+ os.path.join(self._modules_dir, "%s.so" % module),
+ self._modules_dir,
+ [],
)
if "needed" not in deps:
continue
@@ -181,7 +183,7 @@
unused_size = 0
for module in sorted(unused_modules):
- module_path = os.path.join(modules_dir, "%s.so" % module)
+ module_path = os.path.join(self._modules_dir, "%s.so" % module)
unused_size += os.lstat(module_path).st_size
logging.debug("rm %s", module_path)
if not dryrun:
@@ -189,7 +191,7 @@
unused_libdeps_size = 0
for lib in sorted(unused_libdeps):
- lib_path = os.path.join(modules_dir, lib)
+ lib_path = os.path.join(self._modules_dir, lib)
unused_libdeps_size += os.lstat(lib_path).st_size
logging.debug("rm %s", lib_path)
if not dryrun:
@@ -288,8 +290,19 @@
"Searching for unused gconv files defined in %s", gconv_modules_file
)
- gmods = GconvModules(gconv_modules_file)
- charsets = gmods.Load()
+ # Additional gconv-modules configuration files can be present in the
+ # co-located gconv-modules.d. glibc installs a gconv-modules-extra.conf
+ # here by default.
+ modules_dir = os.path.dirname(gconv_modules_file)
+ extras = glob.glob(
+ os.path.join(
+ modules_dir,
+ os.path.basename(gconv_modules_file) + ".d",
+ "*.conf",
+ )
+ )
+ gmods_groups = [GconvModules(gconv_modules_file, modules_dir)]
+ gmods_groups.extend(GconvModules(x, modules_dir) for x in extras)
# Use scanelf to search for all the binary files on the rootfs that require
# or define the symbol iconv_open. We also include the binaries that define
@@ -312,46 +325,48 @@
files = set(result.stdout.splitlines())
logging.debug("Symbols %s found on %d files.", symbols, len(files))
- # The charsets are represented as nul-terminated strings in the binary
- # files, so we append the '\0' to each string. This prevents some false
- # positives when the name of the charset is a substring of some other
- # string. It doesn't prevent false positives when the charset name is the
- # suffix of another string, for example a binary with the string "DON'T DO
- # IT\0" will match the 'IT' charset. Empirical test on ChromeOS images
- # suggests that only 4 charsets could fall in category.
- strings = [s.encode("utf-8") + b"x\00" for s in charsets]
- logging.info(
- "Will search for %d strings in %d files", len(strings), len(files)
- )
-
- # Charsets listed in STICKY_MOUDLES are initialized as used. Note that those
- # strings should be listed in the gconv-modules file.
- unknown_sticky_modules = set(STICKY_MODULES) - set(charsets)
- if unknown_sticky_modules:
- logging.warning(
- "The following charsets were explicitly requested in "
- "STICKY_MODULES even though they don't exist: %s",
- ", ".join(unknown_sticky_modules),
- )
- global_used = [charset in STICKY_MODULES for charset in charsets]
-
- for filename in files:
- used_filenames = MultipleStringMatch(
- strings, osutils.ReadFile(filename, mode="rb")
+ for gmods in gmods_groups:
+ charsets = gmods.Load()
+ # The charsets are represented as nul-terminated strings in the binary
+ # files, so we append the '\0' to each string. This prevents some false
+ # positives when the name of the charset is a substring of some other
+ # string. It doesn't prevent false positives when the charset name is
+ # the suffix of another string, for example a binary with the string
+ # "DON'T DO IT\0" will match the 'IT' charset. Empirical test on
+ # ChromeOS images suggests that only 4 charsets could fall in category.
+ strings = [s.encode("utf-8") + b"x\00" for s in charsets]
+ logging.info(
+ "Will search for %d strings in %d files", len(strings), len(files)
)
- global_used = [
- operator.or_(*x) for x in zip(global_used, used_filenames)
- ]
- # Check the debug flag to avoid running a useless loop.
- if opts.debug and any(used_filenames):
- logging.debug("File %s:", filename)
- for i, used_filename in enumerate(used_filenames):
- if used_filename:
- logging.debug(" - %s", strings[i])
+ # Charsets listed in STICKY_MOUDLES are initialized as used. Note that
+ # those strings should be listed in the gconv-modules file.
+ unknown_sticky_modules = set(STICKY_MODULES) - set(charsets)
+ if unknown_sticky_modules:
+ logging.warning(
+ "The following charsets were explicitly requested in "
+ "STICKY_MODULES even though they don't exist: %s",
+ ", ".join(unknown_sticky_modules),
+ )
+ global_used = [charset in STICKY_MODULES for charset in charsets]
- used_charsets = [cs for cs, used in zip(charsets, global_used) if used]
- gmods.Rewrite(used_charsets, opts.dryrun)
+ for filename in files:
+ used_filenames = MultipleStringMatch(
+ strings, osutils.ReadFile(filename, mode="rb")
+ )
+
+ global_used = [
+ operator.or_(*x) for x in zip(global_used, used_filenames)
+ ]
+ # Check the debug flag to avoid running a useless loop.
+ if opts.debug and any(used_filenames):
+ logging.debug("File %s:", filename)
+ for i, used_filename in enumerate(used_filenames):
+ if used_filename:
+ logging.debug(" - %s", strings[i])
+
+ used_charsets = [cs for cs, used in zip(charsets, global_used) if used]
+ gmods.Rewrite(used_charsets, opts.dryrun)
return 0