blob: e06bbac05c6b327f53479385a88de71dcf318f50 [file] [log] [blame]
Mike Frysingerf1ba7ad2022-09-12 05:42:57 -04001# Copyright 2014 The ChromiumOS Authors
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Mike Frysinger750c5f52014-09-16 16:16:57 -04005"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07006
7Some files in the image are installed to provide some functionality, such as
8chrome, shill or bluetoothd provide different functionality that can be
9present or not on a given build. Many other files are dependencies from these
10files that need to be present in the image for them to work. These dependencies
11come from needed shared libraries, executed files and other configuration files
12read.
13
14This script currently discovers dependencies between ELF files for libraries
15required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070016libraries loaded at runtime with dlopen(). It also computes size and file type
17in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070018"""
19
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070020import json
Chris McDonald59650c32021-07-20 15:29:28 -060021import logging
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070022import multiprocessing
23import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070024import stat
Mike Frysinger00688e12022-04-21 21:22:35 -040025from typing import Union
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070026
Chris McDonald59650c32021-07-20 15:29:28 -060027from chromite.third_party import lddtree
28
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070029from chromite.lib import commandline
Alex Deymo365b10c2014-08-25 13:14:28 -070030from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070031from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070032from chromite.lib import portage_util
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070033
34
35# Regex to parse Gentoo atoms. This should match the following ebuild names,
36# splitting the package name from the version.
37# without version:
38# chromeos-base/tty
39# chromeos-base/libchrome-271506
40# sys-kernel/chromeos-kernel-3_8
41# with version:
42# chromeos-base/tty-0.0.1-r4
43# chromeos-base/libchrome-271506-r5
44# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
Alex Klein1699fab2022-09-08 08:46:06 -060045RE_EBUILD_WITHOUT_VERSION = r"^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$"
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070046RE_EBUILD_WITH_VERSION = (
Alex Klein1699fab2022-09-08 08:46:06 -060047 r"^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$"
48)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070049
50
51def ParseELFWithArgs(args):
Alex Klein1699fab2022-09-08 08:46:06 -060052 """Wrapper to parseelf.ParseELF accepting a single arg.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070053
Alex Klein1699fab2022-09-08 08:46:06 -060054 This wrapper is required to use multiprocessing.Pool.map function.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070055
Alex Klein1699fab2022-09-08 08:46:06 -060056 Returns:
Alex Klein68b270c2023-04-14 14:42:50 -060057 A 2-tuple with the passed relative path and the result of ParseELF(). On
58 error, when ParseELF() returns None, this function returns None.
Alex Klein1699fab2022-09-08 08:46:06 -060059 """
60 elf = parseelf.ParseELF(*args)
61 if elf is None:
62 return
63 return args[1], elf
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070064
65
66class DepTracker(object):
Alex Klein1699fab2022-09-08 08:46:06 -060067 """Tracks dependencies and file information in a root directory.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070068
Alex Klein1699fab2022-09-08 08:46:06 -060069 This class computes dependencies and other information related to the files
70 in the root image.
71 """
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070072
Alex Klein1699fab2022-09-08 08:46:06 -060073 def __init__(self, root: Union[str, os.PathLike], jobs: int = 1):
74 # TODO(vapier): Convert this to Path.
75 root = str(root)
76 root_st = os.lstat(root)
77 if not stat.S_ISDIR(root_st.st_mode):
78 raise Exception("root (%s) must be a directory" % root)
79 self._root = root.rstrip("/") + "/"
80 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070081
Alex Klein1699fab2022-09-08 08:46:06 -060082 # A wrapper to the multiprocess map function. We avoid launching a pool
Alex Klein68b270c2023-04-14 14:42:50 -060083 # of processes when jobs is 1 so python exceptions kill the main
84 # process, useful for debugging.
Alex Klein1699fab2022-09-08 08:46:06 -060085 if jobs > 1:
86 # Pool is close()d in DepTracker's destructor.
87 # pylint: disable=consider-using-with
88 self._pool = multiprocessing.Pool(jobs)
89 self._imap = self._pool.map
90 else:
91 self._imap = map
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070092
Alex Klein1699fab2022-09-08 08:46:06 -060093 self._files = {}
94 self._ebuilds = {}
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070095
Alex Klein1699fab2022-09-08 08:46:06 -060096 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
97 # to point to the lowest lexicographically file with the same inode.
98 self._symlinks = {}
99 self._hardlinks = {}
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700100
Alex Klein1699fab2022-09-08 08:46:06 -0600101 def __del__(self):
102 """Destructor method to free up self._pool resource."""
103 self._pool.close()
Sergey Frolov73eaa312022-06-06 17:26:10 -0600104
Alex Klein1699fab2022-09-08 08:46:06 -0600105 def Init(self):
106 """Generates the initial list of files."""
107 # First iteration over all the files in root searching for symlinks and
108 # non-regular files.
109 seen_inodes = {}
110 for basepath, _, filenames in sorted(os.walk(self._root)):
111 for filename in sorted(filenames):
112 full_path = os.path.join(basepath, filename)
113 rel_path = full_path[len(self._root) :]
114 st = os.lstat(full_path)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700115
Alex Klein1699fab2022-09-08 08:46:06 -0600116 file_data = {
117 "size": st.st_size,
118 }
119 self._files[rel_path] = file_data
120
121 # Track symlinks.
122 if stat.S_ISLNK(st.st_mode):
123 link_path = os.readlink(full_path)
Alex Klein68b270c2023-04-14 14:42:50 -0600124 # lddtree's normpath handles a little more cases than the
125 # os.path version. In particular, it handles the '//' case.
Alex Klein1699fab2022-09-08 08:46:06 -0600126 self._symlinks[rel_path] = (
127 link_path.lstrip("/")
128 if link_path and link_path[0] == "/"
129 else lddtree.normpath(
130 os.path.join(os.path.dirname(rel_path), link_path)
131 )
132 )
133 file_data["deps"] = {"symlink": [self._symlinks[rel_path]]}
134
135 # Track hardlinks.
136 if st.st_ino in seen_inodes:
137 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
138 continue
139 seen_inodes[st.st_ino] = rel_path
140
141 def SaveJSON(self, filename):
142 """Save the computed information to a JSON file.
143
144 Args:
Alex Klein68b270c2023-04-14 14:42:50 -0600145 filename: The destination JSON file.
Alex Klein1699fab2022-09-08 08:46:06 -0600146 """
147 data = {
148 "files": self._files,
149 "ebuilds": self._ebuilds,
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700150 }
Mike Frysinger31fdddd2023-02-24 15:50:55 -0500151 with open(filename, "w", encoding="utf-8") as f:
152 json.dump(data, f)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700153
Alex Klein1699fab2022-09-08 08:46:06 -0600154 def ComputeEbuildDeps(self, sysroot):
155 """Compute the dependencies between ebuilds and files.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700156
Alex Klein68b270c2023-04-14 14:42:50 -0600157 Iterates over the list of ebuilds in the database and annotates the
158 files with the ebuilds they are in. For each ebuild installing a file in
159 the root, also compute the direct dependencies. Stores the information
160 internally.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700161
Alex Klein1699fab2022-09-08 08:46:06 -0600162 Args:
Alex Klein68b270c2023-04-14 14:42:50 -0600163 sysroot: The path to the sysroot, for example "/build/link".
Alex Klein1699fab2022-09-08 08:46:06 -0600164 """
165 portage_db = portage_util.PortageDB(sysroot)
166 if not os.path.exists(portage_db.db_path):
167 logging.warning(
168 "PortageDB directory not found: %s", portage_db.db_path
169 )
170 return
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700171
Alex Klein1699fab2022-09-08 08:46:06 -0600172 for pkg in portage_db.InstalledPackages():
173 pkg_files = []
174 pkg_size = 0
175 cpf = "%s/%s" % (pkg.category, pkg.pf)
176 for typ, rel_path in pkg.ListContents():
177 # We ignore other entries like for example "dir".
178 if not typ in (pkg.OBJ, pkg.SYM):
179 continue
Alex Klein68b270c2023-04-14 14:42:50 -0600180 # We ignore files installed in the SYSROOT that weren't copied
181 # to the image.
Alex Klein1699fab2022-09-08 08:46:06 -0600182 if not rel_path in self._files:
183 continue
184 pkg_files.append(rel_path)
185 file_data = self._files[rel_path]
186 if "ebuild" in file_data:
187 logging.warning(
188 "Duplicated entry for %s: %s and %s",
189 rel_path,
190 file_data["ebuild"],
191 cpf,
192 )
193 file_data["ebuild"] = cpf
194 pkg_size += file_data["size"]
195 # Ignore packages that don't install any file.
196 if not pkg_files:
197 continue
198 self._ebuilds[cpf] = {
199 "size": pkg_size,
200 "files": len(pkg_files),
201 "atom": "%s/%s" % (pkg.category, pkg.package),
202 "version": pkg.version,
203 }
204 # TODO(deymo): Parse dependencies between ebuilds.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700205
Alex Klein1699fab2022-09-08 08:46:06 -0600206 def ComputeELFFileDeps(self):
207 """Computes the dependencies between files.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700208
Alex Klein1699fab2022-09-08 08:46:06 -0600209 Computes the dependencies between the files in the root directory passed
210 during construction. The dependencies are inferred for ELF files.
211 The list of dependencies for each file in the passed rootfs as a dict().
Alex Klein68b270c2023-04-14 14:42:50 -0600212 The result's keys are the relative path of the files and the value of
213 each file is a list of dependencies. A dependency is a tuple (dep_path,
214 dep_type) where the dep_path is relative path from the passed root to
215 the dependent file and dep_type is one of the following strings stating
216 how the dependency was discovered:
217 'ldd': The dependent ELF file is listed as needed in the dynamic
218 section.
219 'symlink': The dependent file is a symlink to the depending.
Alex Klein1699fab2022-09-08 08:46:06 -0600220 If there are dependencies of a given type whose target file wasn't
221 determined, a tuple (None, dep_type) is included. This is the case for
222 example is a program uses library that wasn't found.
223 """
224 ldpaths = lddtree.LoadLdpaths(self._root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700225
Alex Klein1699fab2022-09-08 08:46:06 -0600226 # First iteration over all the files in root searching for symlinks and
227 # non-regular files.
228 parseelf_args = []
229 for rel_path, file_data in self._files.items():
230 if rel_path in self._symlinks or rel_path in self._hardlinks:
231 continue
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700232
Alex Klein1699fab2022-09-08 08:46:06 -0600233 full_path = os.path.join(self._root, rel_path)
234 st = os.lstat(full_path)
235 if not stat.S_ISREG(st.st_mode):
236 continue
237 parseelf_args.append((self._root, rel_path, ldpaths))
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700238
Alex Klein1699fab2022-09-08 08:46:06 -0600239 # Parallelize the ELF lookup step since it is quite expensive.
240 elfs = dict(
241 x
242 for x in self._imap(ParseELFWithArgs, parseelf_args)
243 if not x is None
244 )
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700245
Alex Klein1699fab2022-09-08 08:46:06 -0600246 for rel_path, elf in elfs.items():
247 file_data = self._files[rel_path]
Alex Klein68b270c2023-04-14 14:42:50 -0600248 # Fill in the ftype if not set yet. We complete this value at this
249 # point to avoid re-parsing the ELF file later.
Alex Klein1699fab2022-09-08 08:46:06 -0600250 if not "ftype" in file_data:
251 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
252 if ftype:
253 file_data["ftype"] = ftype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700254
Alex Klein1699fab2022-09-08 08:46:06 -0600255 file_deps = file_data.get("deps", {})
256 # Dependencies based on the result of ldd.
257 for lib in elf.get("needed", []):
258 lib_path = elf["libs"][lib]["path"]
259 if not "ldd" in file_deps:
260 file_deps["ldd"] = []
261 file_deps["ldd"].append(lib_path)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700262
Alex Klein1699fab2022-09-08 08:46:06 -0600263 if file_deps:
264 file_data["deps"] = file_deps
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700265
Alex Klein1699fab2022-09-08 08:46:06 -0600266 def ComputeFileTypes(self):
267 """Computes all the missing file type for the files in the root."""
268 for rel_path, file_data in self._files.items():
269 if "ftype" in file_data:
270 continue
271 ftype = self._file_type_decoder.GetType(rel_path)
272 if ftype:
273 file_data["ftype"] = ftype
Alex Deymo365b10c2014-08-25 13:14:28 -0700274
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700275
276def ParseArgs(argv):
Alex Klein1699fab2022-09-08 08:46:06 -0600277 """Return parsed commandline arguments."""
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700278
Alex Klein1699fab2022-09-08 08:46:06 -0600279 parser = commandline.ArgumentParser()
280 parser.add_argument(
281 "-j",
282 "--jobs",
283 type=int,
284 default=multiprocessing.cpu_count(),
285 help="number of simultaneous jobs.",
286 )
287 parser.add_argument(
288 "--sysroot",
289 type="path",
290 metavar="SYSROOT",
Alex Klein68b270c2023-04-14 14:42:50 -0600291 help="parse portage DB for ebuild information from the provided "
292 "sysroot.",
Alex Klein1699fab2022-09-08 08:46:06 -0600293 )
294 parser.add_argument(
295 "--json", type="path", help="store information in JSON file."
296 )
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700297
Alex Klein1699fab2022-09-08 08:46:06 -0600298 parser.add_argument(
299 "root",
300 type="path",
301 help="path to the directory where the rootfs is mounted.",
302 )
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700303
Alex Klein1699fab2022-09-08 08:46:06 -0600304 opts = parser.parse_args(argv)
305 opts.Freeze()
306 return opts
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700307
308
309def main(argv):
Alex Klein1699fab2022-09-08 08:46:06 -0600310 """Main function to start the script."""
311 opts = ParseArgs(argv)
312 logging.debug("Options are %s", opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700313
Alex Klein1699fab2022-09-08 08:46:06 -0600314 dt = DepTracker(opts.root, jobs=opts.jobs)
315 dt.Init()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700316
Alex Klein1699fab2022-09-08 08:46:06 -0600317 dt.ComputeELFFileDeps()
318 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700319
Alex Klein1699fab2022-09-08 08:46:06 -0600320 if opts.sysroot:
321 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700322
Alex Klein1699fab2022-09-08 08:46:06 -0600323 if opts.json:
324 dt.SaveJSON(opts.json)