blob: 9923b7a038d9e4286ebbf255491039d065fda4e6 [file] [log] [blame]
Mike Frysingerf1ba7ad2022-09-12 05:42:57 -04001# Copyright 2014 The ChromiumOS Authors
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Mike Frysinger750c5f52014-09-16 16:16:57 -04005"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07006
7Some files in the image are installed to provide some functionality, such as
8chrome, shill or bluetoothd provide different functionality that can be
9present or not on a given build. Many other files are dependencies from these
10files that need to be present in the image for them to work. These dependencies
11come from needed shared libraries, executed files and other configuration files
12read.
13
14This script currently discovers dependencies between ELF files for libraries
15required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070016libraries loaded at runtime with dlopen(). It also computes size and file type
17in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070018"""
19
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070020import json
Chris McDonald59650c32021-07-20 15:29:28 -060021import logging
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070022import multiprocessing
23import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070024import stat
Mike Frysinger00688e12022-04-21 21:22:35 -040025from typing import Union
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070026
Chris McDonald59650c32021-07-20 15:29:28 -060027from chromite.third_party import lddtree
28
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070029from chromite.lib import commandline
Alex Deymo365b10c2014-08-25 13:14:28 -070030from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070031from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070032from chromite.lib import portage_util
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070033
34
35# Regex to parse Gentoo atoms. This should match the following ebuild names,
36# splitting the package name from the version.
37# without version:
38# chromeos-base/tty
39# chromeos-base/libchrome-271506
40# sys-kernel/chromeos-kernel-3_8
41# with version:
42# chromeos-base/tty-0.0.1-r4
43# chromeos-base/libchrome-271506-r5
44# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
Alex Klein1699fab2022-09-08 08:46:06 -060045RE_EBUILD_WITHOUT_VERSION = r"^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$"
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070046RE_EBUILD_WITH_VERSION = (
Alex Klein1699fab2022-09-08 08:46:06 -060047 r"^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$"
48)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070049
50
51def ParseELFWithArgs(args):
Alex Klein1699fab2022-09-08 08:46:06 -060052 """Wrapper to parseelf.ParseELF accepting a single arg.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070053
Alex Klein1699fab2022-09-08 08:46:06 -060054 This wrapper is required to use multiprocessing.Pool.map function.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070055
Alex Klein1699fab2022-09-08 08:46:06 -060056 Returns:
57 A 2-tuple with the passed relative path and the result of ParseELF(). On
58 error, when ParseELF() returns None, this function returns None.
59 """
60 elf = parseelf.ParseELF(*args)
61 if elf is None:
62 return
63 return args[1], elf
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070064
65
66class DepTracker(object):
Alex Klein1699fab2022-09-08 08:46:06 -060067 """Tracks dependencies and file information in a root directory.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070068
Alex Klein1699fab2022-09-08 08:46:06 -060069 This class computes dependencies and other information related to the files
70 in the root image.
71 """
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070072
Alex Klein1699fab2022-09-08 08:46:06 -060073 def __init__(self, root: Union[str, os.PathLike], jobs: int = 1):
74 # TODO(vapier): Convert this to Path.
75 root = str(root)
76 root_st = os.lstat(root)
77 if not stat.S_ISDIR(root_st.st_mode):
78 raise Exception("root (%s) must be a directory" % root)
79 self._root = root.rstrip("/") + "/"
80 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070081
Alex Klein1699fab2022-09-08 08:46:06 -060082 # A wrapper to the multiprocess map function. We avoid launching a pool
83 # of processes when jobs is 1 so python exceptions kill the main process,
84 # useful for debugging.
85 if jobs > 1:
86 # Pool is close()d in DepTracker's destructor.
87 # pylint: disable=consider-using-with
88 self._pool = multiprocessing.Pool(jobs)
89 self._imap = self._pool.map
90 else:
91 self._imap = map
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070092
Alex Klein1699fab2022-09-08 08:46:06 -060093 self._files = {}
94 self._ebuilds = {}
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070095
Alex Klein1699fab2022-09-08 08:46:06 -060096 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
97 # to point to the lowest lexicographically file with the same inode.
98 self._symlinks = {}
99 self._hardlinks = {}
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700100
Alex Klein1699fab2022-09-08 08:46:06 -0600101 def __del__(self):
102 """Destructor method to free up self._pool resource."""
103 self._pool.close()
Sergey Frolov73eaa312022-06-06 17:26:10 -0600104
Alex Klein1699fab2022-09-08 08:46:06 -0600105 def Init(self):
106 """Generates the initial list of files."""
107 # First iteration over all the files in root searching for symlinks and
108 # non-regular files.
109 seen_inodes = {}
110 for basepath, _, filenames in sorted(os.walk(self._root)):
111 for filename in sorted(filenames):
112 full_path = os.path.join(basepath, filename)
113 rel_path = full_path[len(self._root) :]
114 st = os.lstat(full_path)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700115
Alex Klein1699fab2022-09-08 08:46:06 -0600116 file_data = {
117 "size": st.st_size,
118 }
119 self._files[rel_path] = file_data
120
121 # Track symlinks.
122 if stat.S_ISLNK(st.st_mode):
123 link_path = os.readlink(full_path)
124 # lddtree's normpath handles a little more cases than the os.path
125 # version. In particular, it handles the '//' case.
126 self._symlinks[rel_path] = (
127 link_path.lstrip("/")
128 if link_path and link_path[0] == "/"
129 else lddtree.normpath(
130 os.path.join(os.path.dirname(rel_path), link_path)
131 )
132 )
133 file_data["deps"] = {"symlink": [self._symlinks[rel_path]]}
134
135 # Track hardlinks.
136 if st.st_ino in seen_inodes:
137 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
138 continue
139 seen_inodes[st.st_ino] = rel_path
140
141 def SaveJSON(self, filename):
142 """Save the computed information to a JSON file.
143
144 Args:
145 filename: The destination JSON file.
146 """
147 data = {
148 "files": self._files,
149 "ebuilds": self._ebuilds,
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700150 }
Mike Frysinger31fdddd2023-02-24 15:50:55 -0500151 with open(filename, "w", encoding="utf-8") as f:
152 json.dump(data, f)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700153
Alex Klein1699fab2022-09-08 08:46:06 -0600154 def ComputeEbuildDeps(self, sysroot):
155 """Compute the dependencies between ebuilds and files.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700156
Alex Klein1699fab2022-09-08 08:46:06 -0600157 Iterates over the list of ebuilds in the database and annotates the files
158 with the ebuilds they are in. For each ebuild installing a file in the root,
159 also compute the direct dependencies. Stores the information internally.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700160
Alex Klein1699fab2022-09-08 08:46:06 -0600161 Args:
162 sysroot: The path to the sysroot, for example "/build/link".
163 """
164 portage_db = portage_util.PortageDB(sysroot)
165 if not os.path.exists(portage_db.db_path):
166 logging.warning(
167 "PortageDB directory not found: %s", portage_db.db_path
168 )
169 return
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700170
Alex Klein1699fab2022-09-08 08:46:06 -0600171 for pkg in portage_db.InstalledPackages():
172 pkg_files = []
173 pkg_size = 0
174 cpf = "%s/%s" % (pkg.category, pkg.pf)
175 for typ, rel_path in pkg.ListContents():
176 # We ignore other entries like for example "dir".
177 if not typ in (pkg.OBJ, pkg.SYM):
178 continue
179 # We ignore files installed in the SYSROOT that weren't copied to the
180 # image.
181 if not rel_path in self._files:
182 continue
183 pkg_files.append(rel_path)
184 file_data = self._files[rel_path]
185 if "ebuild" in file_data:
186 logging.warning(
187 "Duplicated entry for %s: %s and %s",
188 rel_path,
189 file_data["ebuild"],
190 cpf,
191 )
192 file_data["ebuild"] = cpf
193 pkg_size += file_data["size"]
194 # Ignore packages that don't install any file.
195 if not pkg_files:
196 continue
197 self._ebuilds[cpf] = {
198 "size": pkg_size,
199 "files": len(pkg_files),
200 "atom": "%s/%s" % (pkg.category, pkg.package),
201 "version": pkg.version,
202 }
203 # TODO(deymo): Parse dependencies between ebuilds.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700204
Alex Klein1699fab2022-09-08 08:46:06 -0600205 def ComputeELFFileDeps(self):
206 """Computes the dependencies between files.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700207
Alex Klein1699fab2022-09-08 08:46:06 -0600208 Computes the dependencies between the files in the root directory passed
209 during construction. The dependencies are inferred for ELF files.
210 The list of dependencies for each file in the passed rootfs as a dict().
211 The result's keys are the relative path of the files and the value of each
212 file is a list of dependencies. A dependency is a tuple (dep_path,
213 dep_type) where the dep_path is relative path from the passed root to the
214 dependent file and dep_type is one the following strings stating how the
215 dependency was discovered:
216 'ldd': The dependent ELF file is listed as needed in the dynamic section.
217 'symlink': The dependent file is a symlink to the depending.
218 If there are dependencies of a given type whose target file wasn't
219 determined, a tuple (None, dep_type) is included. This is the case for
220 example is a program uses library that wasn't found.
221 """
222 ldpaths = lddtree.LoadLdpaths(self._root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700223
Alex Klein1699fab2022-09-08 08:46:06 -0600224 # First iteration over all the files in root searching for symlinks and
225 # non-regular files.
226 parseelf_args = []
227 for rel_path, file_data in self._files.items():
228 if rel_path in self._symlinks or rel_path in self._hardlinks:
229 continue
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700230
Alex Klein1699fab2022-09-08 08:46:06 -0600231 full_path = os.path.join(self._root, rel_path)
232 st = os.lstat(full_path)
233 if not stat.S_ISREG(st.st_mode):
234 continue
235 parseelf_args.append((self._root, rel_path, ldpaths))
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700236
Alex Klein1699fab2022-09-08 08:46:06 -0600237 # Parallelize the ELF lookup step since it is quite expensive.
238 elfs = dict(
239 x
240 for x in self._imap(ParseELFWithArgs, parseelf_args)
241 if not x is None
242 )
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700243
Alex Klein1699fab2022-09-08 08:46:06 -0600244 for rel_path, elf in elfs.items():
245 file_data = self._files[rel_path]
246 # Fill in the ftype if not set yet. We complete this value at this point
247 # to avoid re-parsing the ELF file later.
248 if not "ftype" in file_data:
249 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
250 if ftype:
251 file_data["ftype"] = ftype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700252
Alex Klein1699fab2022-09-08 08:46:06 -0600253 file_deps = file_data.get("deps", {})
254 # Dependencies based on the result of ldd.
255 for lib in elf.get("needed", []):
256 lib_path = elf["libs"][lib]["path"]
257 if not "ldd" in file_deps:
258 file_deps["ldd"] = []
259 file_deps["ldd"].append(lib_path)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700260
Alex Klein1699fab2022-09-08 08:46:06 -0600261 if file_deps:
262 file_data["deps"] = file_deps
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700263
Alex Klein1699fab2022-09-08 08:46:06 -0600264 def ComputeFileTypes(self):
265 """Computes all the missing file type for the files in the root."""
266 for rel_path, file_data in self._files.items():
267 if "ftype" in file_data:
268 continue
269 ftype = self._file_type_decoder.GetType(rel_path)
270 if ftype:
271 file_data["ftype"] = ftype
Alex Deymo365b10c2014-08-25 13:14:28 -0700272
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700273
274def ParseArgs(argv):
Alex Klein1699fab2022-09-08 08:46:06 -0600275 """Return parsed commandline arguments."""
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700276
Alex Klein1699fab2022-09-08 08:46:06 -0600277 parser = commandline.ArgumentParser()
278 parser.add_argument(
279 "-j",
280 "--jobs",
281 type=int,
282 default=multiprocessing.cpu_count(),
283 help="number of simultaneous jobs.",
284 )
285 parser.add_argument(
286 "--sysroot",
287 type="path",
288 metavar="SYSROOT",
289 help="parse portage DB for ebuild information from the provided sysroot.",
290 )
291 parser.add_argument(
292 "--json", type="path", help="store information in JSON file."
293 )
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700294
Alex Klein1699fab2022-09-08 08:46:06 -0600295 parser.add_argument(
296 "root",
297 type="path",
298 help="path to the directory where the rootfs is mounted.",
299 )
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700300
Alex Klein1699fab2022-09-08 08:46:06 -0600301 opts = parser.parse_args(argv)
302 opts.Freeze()
303 return opts
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700304
305
306def main(argv):
Alex Klein1699fab2022-09-08 08:46:06 -0600307 """Main function to start the script."""
308 opts = ParseArgs(argv)
309 logging.debug("Options are %s", opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700310
Alex Klein1699fab2022-09-08 08:46:06 -0600311 dt = DepTracker(opts.root, jobs=opts.jobs)
312 dt.Init()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700313
Alex Klein1699fab2022-09-08 08:46:06 -0600314 dt.ComputeELFFileDeps()
315 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700316
Alex Klein1699fab2022-09-08 08:46:06 -0600317 if opts.sysroot:
318 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700319
Alex Klein1699fab2022-09-08 08:46:06 -0600320 if opts.json:
321 dt.SaveJSON(opts.json)