blob: 4ef05c88bf969cd464245d9d51e0f8079e1b7f13 [file] [log] [blame]
Mike Frysingerf1ba7ad2022-09-12 05:42:57 -04001# Copyright 2014 The ChromiumOS Authors
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Mike Frysinger750c5f52014-09-16 16:16:57 -04005"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07006
7Some files in the image are installed to provide some functionality, such as
8chrome, shill or bluetoothd provide different functionality that can be
9present or not on a given build. Many other files are dependencies from these
10files that need to be present in the image for them to work. These dependencies
11come from needed shared libraries, executed files and other configuration files
12read.
13
14This script currently discovers dependencies between ELF files for libraries
15required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070016libraries loaded at runtime with dlopen(). It also computes size and file type
17in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070018"""
19
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070020import json
Chris McDonald59650c32021-07-20 15:29:28 -060021import logging
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070022import multiprocessing
23import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070024import stat
Mike Frysinger00688e12022-04-21 21:22:35 -040025from typing import Union
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070026
Chris McDonald59650c32021-07-20 15:29:28 -060027from chromite.third_party import lddtree
28
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070029from chromite.lib import commandline
Alex Deymo365b10c2014-08-25 13:14:28 -070030from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070031from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070032from chromite.lib import portage_util
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070033
34
35# Regex to parse Gentoo atoms. This should match the following ebuild names,
36# splitting the package name from the version.
37# without version:
38# chromeos-base/tty
39# chromeos-base/libchrome-271506
40# sys-kernel/chromeos-kernel-3_8
41# with version:
42# chromeos-base/tty-0.0.1-r4
43# chromeos-base/libchrome-271506-r5
44# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
Alex Klein1699fab2022-09-08 08:46:06 -060045RE_EBUILD_WITHOUT_VERSION = r"^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$"
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070046RE_EBUILD_WITH_VERSION = (
Alex Klein1699fab2022-09-08 08:46:06 -060047 r"^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$"
48)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070049
50
51def ParseELFWithArgs(args):
Alex Klein1699fab2022-09-08 08:46:06 -060052 """Wrapper to parseelf.ParseELF accepting a single arg.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070053
Alex Klein1699fab2022-09-08 08:46:06 -060054 This wrapper is required to use multiprocessing.Pool.map function.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070055
Alex Klein1699fab2022-09-08 08:46:06 -060056 Returns:
57 A 2-tuple with the passed relative path and the result of ParseELF(). On
58 error, when ParseELF() returns None, this function returns None.
59 """
60 elf = parseelf.ParseELF(*args)
61 if elf is None:
62 return
63 return args[1], elf
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070064
65
66class DepTracker(object):
Alex Klein1699fab2022-09-08 08:46:06 -060067 """Tracks dependencies and file information in a root directory.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070068
Alex Klein1699fab2022-09-08 08:46:06 -060069 This class computes dependencies and other information related to the files
70 in the root image.
71 """
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070072
Alex Klein1699fab2022-09-08 08:46:06 -060073 def __init__(self, root: Union[str, os.PathLike], jobs: int = 1):
74 # TODO(vapier): Convert this to Path.
75 root = str(root)
76 root_st = os.lstat(root)
77 if not stat.S_ISDIR(root_st.st_mode):
78 raise Exception("root (%s) must be a directory" % root)
79 self._root = root.rstrip("/") + "/"
80 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070081
Alex Klein1699fab2022-09-08 08:46:06 -060082 # A wrapper to the multiprocess map function. We avoid launching a pool
83 # of processes when jobs is 1 so python exceptions kill the main process,
84 # useful for debugging.
85 if jobs > 1:
86 # Pool is close()d in DepTracker's destructor.
87 # pylint: disable=consider-using-with
88 self._pool = multiprocessing.Pool(jobs)
89 self._imap = self._pool.map
90 else:
91 self._imap = map
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070092
Alex Klein1699fab2022-09-08 08:46:06 -060093 self._files = {}
94 self._ebuilds = {}
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070095
Alex Klein1699fab2022-09-08 08:46:06 -060096 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
97 # to point to the lowest lexicographically file with the same inode.
98 self._symlinks = {}
99 self._hardlinks = {}
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700100
Alex Klein1699fab2022-09-08 08:46:06 -0600101 def __del__(self):
102 """Destructor method to free up self._pool resource."""
103 self._pool.close()
Sergey Frolov73eaa312022-06-06 17:26:10 -0600104
Alex Klein1699fab2022-09-08 08:46:06 -0600105 def Init(self):
106 """Generates the initial list of files."""
107 # First iteration over all the files in root searching for symlinks and
108 # non-regular files.
109 seen_inodes = {}
110 for basepath, _, filenames in sorted(os.walk(self._root)):
111 for filename in sorted(filenames):
112 full_path = os.path.join(basepath, filename)
113 rel_path = full_path[len(self._root) :]
114 st = os.lstat(full_path)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700115
Alex Klein1699fab2022-09-08 08:46:06 -0600116 file_data = {
117 "size": st.st_size,
118 }
119 self._files[rel_path] = file_data
120
121 # Track symlinks.
122 if stat.S_ISLNK(st.st_mode):
123 link_path = os.readlink(full_path)
124 # lddtree's normpath handles a little more cases than the os.path
125 # version. In particular, it handles the '//' case.
126 self._symlinks[rel_path] = (
127 link_path.lstrip("/")
128 if link_path and link_path[0] == "/"
129 else lddtree.normpath(
130 os.path.join(os.path.dirname(rel_path), link_path)
131 )
132 )
133 file_data["deps"] = {"symlink": [self._symlinks[rel_path]]}
134
135 # Track hardlinks.
136 if st.st_ino in seen_inodes:
137 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
138 continue
139 seen_inodes[st.st_ino] = rel_path
140
141 def SaveJSON(self, filename):
142 """Save the computed information to a JSON file.
143
144 Args:
145 filename: The destination JSON file.
146 """
147 data = {
148 "files": self._files,
149 "ebuilds": self._ebuilds,
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700150 }
Alex Klein1699fab2022-09-08 08:46:06 -0600151 json.dump(data, open(filename, "w"))
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700152
Alex Klein1699fab2022-09-08 08:46:06 -0600153 def ComputeEbuildDeps(self, sysroot):
154 """Compute the dependencies between ebuilds and files.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700155
Alex Klein1699fab2022-09-08 08:46:06 -0600156 Iterates over the list of ebuilds in the database and annotates the files
157 with the ebuilds they are in. For each ebuild installing a file in the root,
158 also compute the direct dependencies. Stores the information internally.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700159
Alex Klein1699fab2022-09-08 08:46:06 -0600160 Args:
161 sysroot: The path to the sysroot, for example "/build/link".
162 """
163 portage_db = portage_util.PortageDB(sysroot)
164 if not os.path.exists(portage_db.db_path):
165 logging.warning(
166 "PortageDB directory not found: %s", portage_db.db_path
167 )
168 return
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700169
Alex Klein1699fab2022-09-08 08:46:06 -0600170 for pkg in portage_db.InstalledPackages():
171 pkg_files = []
172 pkg_size = 0
173 cpf = "%s/%s" % (pkg.category, pkg.pf)
174 for typ, rel_path in pkg.ListContents():
175 # We ignore other entries like for example "dir".
176 if not typ in (pkg.OBJ, pkg.SYM):
177 continue
178 # We ignore files installed in the SYSROOT that weren't copied to the
179 # image.
180 if not rel_path in self._files:
181 continue
182 pkg_files.append(rel_path)
183 file_data = self._files[rel_path]
184 if "ebuild" in file_data:
185 logging.warning(
186 "Duplicated entry for %s: %s and %s",
187 rel_path,
188 file_data["ebuild"],
189 cpf,
190 )
191 file_data["ebuild"] = cpf
192 pkg_size += file_data["size"]
193 # Ignore packages that don't install any file.
194 if not pkg_files:
195 continue
196 self._ebuilds[cpf] = {
197 "size": pkg_size,
198 "files": len(pkg_files),
199 "atom": "%s/%s" % (pkg.category, pkg.package),
200 "version": pkg.version,
201 }
202 # TODO(deymo): Parse dependencies between ebuilds.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700203
Alex Klein1699fab2022-09-08 08:46:06 -0600204 def ComputeELFFileDeps(self):
205 """Computes the dependencies between files.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700206
Alex Klein1699fab2022-09-08 08:46:06 -0600207 Computes the dependencies between the files in the root directory passed
208 during construction. The dependencies are inferred for ELF files.
209 The list of dependencies for each file in the passed rootfs as a dict().
210 The result's keys are the relative path of the files and the value of each
211 file is a list of dependencies. A dependency is a tuple (dep_path,
212 dep_type) where the dep_path is relative path from the passed root to the
213 dependent file and dep_type is one the following strings stating how the
214 dependency was discovered:
215 'ldd': The dependent ELF file is listed as needed in the dynamic section.
216 'symlink': The dependent file is a symlink to the depending.
217 If there are dependencies of a given type whose target file wasn't
218 determined, a tuple (None, dep_type) is included. This is the case for
219 example is a program uses library that wasn't found.
220 """
221 ldpaths = lddtree.LoadLdpaths(self._root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700222
Alex Klein1699fab2022-09-08 08:46:06 -0600223 # First iteration over all the files in root searching for symlinks and
224 # non-regular files.
225 parseelf_args = []
226 for rel_path, file_data in self._files.items():
227 if rel_path in self._symlinks or rel_path in self._hardlinks:
228 continue
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700229
Alex Klein1699fab2022-09-08 08:46:06 -0600230 full_path = os.path.join(self._root, rel_path)
231 st = os.lstat(full_path)
232 if not stat.S_ISREG(st.st_mode):
233 continue
234 parseelf_args.append((self._root, rel_path, ldpaths))
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700235
Alex Klein1699fab2022-09-08 08:46:06 -0600236 # Parallelize the ELF lookup step since it is quite expensive.
237 elfs = dict(
238 x
239 for x in self._imap(ParseELFWithArgs, parseelf_args)
240 if not x is None
241 )
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700242
Alex Klein1699fab2022-09-08 08:46:06 -0600243 for rel_path, elf in elfs.items():
244 file_data = self._files[rel_path]
245 # Fill in the ftype if not set yet. We complete this value at this point
246 # to avoid re-parsing the ELF file later.
247 if not "ftype" in file_data:
248 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
249 if ftype:
250 file_data["ftype"] = ftype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700251
Alex Klein1699fab2022-09-08 08:46:06 -0600252 file_deps = file_data.get("deps", {})
253 # Dependencies based on the result of ldd.
254 for lib in elf.get("needed", []):
255 lib_path = elf["libs"][lib]["path"]
256 if not "ldd" in file_deps:
257 file_deps["ldd"] = []
258 file_deps["ldd"].append(lib_path)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700259
Alex Klein1699fab2022-09-08 08:46:06 -0600260 if file_deps:
261 file_data["deps"] = file_deps
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700262
Alex Klein1699fab2022-09-08 08:46:06 -0600263 def ComputeFileTypes(self):
264 """Computes all the missing file type for the files in the root."""
265 for rel_path, file_data in self._files.items():
266 if "ftype" in file_data:
267 continue
268 ftype = self._file_type_decoder.GetType(rel_path)
269 if ftype:
270 file_data["ftype"] = ftype
Alex Deymo365b10c2014-08-25 13:14:28 -0700271
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700272
273def ParseArgs(argv):
Alex Klein1699fab2022-09-08 08:46:06 -0600274 """Return parsed commandline arguments."""
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700275
Alex Klein1699fab2022-09-08 08:46:06 -0600276 parser = commandline.ArgumentParser()
277 parser.add_argument(
278 "-j",
279 "--jobs",
280 type=int,
281 default=multiprocessing.cpu_count(),
282 help="number of simultaneous jobs.",
283 )
284 parser.add_argument(
285 "--sysroot",
286 type="path",
287 metavar="SYSROOT",
288 help="parse portage DB for ebuild information from the provided sysroot.",
289 )
290 parser.add_argument(
291 "--json", type="path", help="store information in JSON file."
292 )
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700293
Alex Klein1699fab2022-09-08 08:46:06 -0600294 parser.add_argument(
295 "root",
296 type="path",
297 help="path to the directory where the rootfs is mounted.",
298 )
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700299
Alex Klein1699fab2022-09-08 08:46:06 -0600300 opts = parser.parse_args(argv)
301 opts.Freeze()
302 return opts
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700303
304
305def main(argv):
Alex Klein1699fab2022-09-08 08:46:06 -0600306 """Main function to start the script."""
307 opts = ParseArgs(argv)
308 logging.debug("Options are %s", opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700309
Alex Klein1699fab2022-09-08 08:46:06 -0600310 dt = DepTracker(opts.root, jobs=opts.jobs)
311 dt.Init()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700312
Alex Klein1699fab2022-09-08 08:46:06 -0600313 dt.ComputeELFFileDeps()
314 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700315
Alex Klein1699fab2022-09-08 08:46:06 -0600316 if opts.sysroot:
317 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700318
Alex Klein1699fab2022-09-08 08:46:06 -0600319 if opts.json:
320 dt.SaveJSON(opts.json)