blob: ced8cb837525b694afe4ce5c627c00e631cea754 [file] [log] [blame]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07001# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Mike Frysinger750c5f52014-09-16 16:16:57 -04005"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07006
7Some files in the image are installed to provide some functionality, such as
8chrome, shill or bluetoothd provide different functionality that can be
9present or not on a given build. Many other files are dependencies from these
10files that need to be present in the image for them to work. These dependencies
11come from needed shared libraries, executed files and other configuration files
12read.
13
14This script currently discovers dependencies between ELF files for libraries
15required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070016libraries loaded at runtime with dlopen(). It also computes size and file type
17in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070018"""
19
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070020import json
Chris McDonald59650c32021-07-20 15:29:28 -060021import logging
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070022import multiprocessing
23import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070024import stat
Mike Frysinger00688e12022-04-21 21:22:35 -040025from typing import Union
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070026
Chris McDonald59650c32021-07-20 15:29:28 -060027from chromite.third_party import lddtree
28
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070029from chromite.lib import commandline
Alex Deymo365b10c2014-08-25 13:14:28 -070030from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070031from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070032from chromite.lib import portage_util
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070033
34
35# Regex to parse Gentoo atoms. This should match the following ebuild names,
36# splitting the package name from the version.
37# without version:
38# chromeos-base/tty
39# chromeos-base/libchrome-271506
40# sys-kernel/chromeos-kernel-3_8
41# with version:
42# chromeos-base/tty-0.0.1-r4
43# chromeos-base/libchrome-271506-r5
44# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
45RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
46RE_EBUILD_WITH_VERSION = (
47 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
48
49
50def ParseELFWithArgs(args):
51 """Wrapper to parseelf.ParseELF accepting a single arg.
52
53 This wrapper is required to use multiprocessing.Pool.map function.
54
55 Returns:
56 A 2-tuple with the passed relative path and the result of ParseELF(). On
57 error, when ParseELF() returns None, this function returns None.
58 """
59 elf = parseelf.ParseELF(*args)
60 if elf is None:
61 return
62 return args[1], elf
63
64
65class DepTracker(object):
66 """Tracks dependencies and file information in a root directory.
67
68 This class computes dependencies and other information related to the files
69 in the root image.
70 """
71
Mike Frysinger00688e12022-04-21 21:22:35 -040072 def __init__(
73 self,
74 root: Union[str, os.PathLike],
75 jobs: int = 1):
76 # TODO(vapier): Convert this to Path.
77 root = str(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070078 root_st = os.lstat(root)
79 if not stat.S_ISDIR(root_st.st_mode):
80 raise Exception('root (%s) must be a directory' % root)
81 self._root = root.rstrip('/') + '/'
Alex Deymo365b10c2014-08-25 13:14:28 -070082 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070083
84 # A wrapper to the multiprocess map function. We avoid launching a pool
85 # of processes when jobs is 1 so python exceptions kill the main process,
86 # useful for debugging.
87 if jobs > 1:
Sergey Frolov73eaa312022-06-06 17:26:10 -060088 # Pool is close()d in DepTracker's destructor.
89 # pylint: disable=consider-using-with
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070090 self._pool = multiprocessing.Pool(jobs)
91 self._imap = self._pool.map
92 else:
Mike Frysingere852b072021-05-21 12:39:03 -040093 self._imap = map
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070094
95 self._files = {}
96 self._ebuilds = {}
97
98 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
99 # to point to the lowest lexicographically file with the same inode.
100 self._symlinks = {}
101 self._hardlinks = {}
102
Sergey Frolov73eaa312022-06-06 17:26:10 -0600103 def __del__(self):
104 """Destructor method to free up self._pool resource."""
105 self._pool.close()
106
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700107 def Init(self):
108 """Generates the initial list of files."""
109 # First iteration over all the files in root searching for symlinks and
110 # non-regular files.
111 seen_inodes = {}
112 for basepath, _, filenames in sorted(os.walk(self._root)):
113 for filename in sorted(filenames):
114 full_path = os.path.join(basepath, filename)
115 rel_path = full_path[len(self._root):]
116 st = os.lstat(full_path)
117
118 file_data = {
119 'size': st.st_size,
120 }
121 self._files[rel_path] = file_data
122
123 # Track symlinks.
124 if stat.S_ISLNK(st.st_mode):
125 link_path = os.readlink(full_path)
126 # lddtree's normpath handles a little more cases than the os.path
127 # version. In particular, it handles the '//' case.
128 self._symlinks[rel_path] = (
129 link_path.lstrip('/') if link_path and link_path[0] == '/' else
130 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
131 link_path)))
132 file_data['deps'] = {
Mike Frysingere65f3752014-12-08 00:46:39 -0500133 'symlink': [self._symlinks[rel_path]]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700134 }
135
136 # Track hardlinks.
137 if st.st_ino in seen_inodes:
138 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
139 continue
140 seen_inodes[st.st_ino] = rel_path
141
142 def SaveJSON(self, filename):
143 """Save the computed information to a JSON file.
144
145 Args:
146 filename: The destination JSON file.
147 """
148 data = {
149 'files': self._files,
150 'ebuilds': self._ebuilds,
151 }
152 json.dump(data, open(filename, 'w'))
153
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700154 def ComputeEbuildDeps(self, sysroot):
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700155 """Compute the dependencies between ebuilds and files.
156
157 Iterates over the list of ebuilds in the database and annotates the files
158 with the ebuilds they are in. For each ebuild installing a file in the root,
159 also compute the direct dependencies. Stores the information internally.
160
161 Args:
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700162 sysroot: The path to the sysroot, for example "/build/link".
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700163 """
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700164 portage_db = portage_util.PortageDB(sysroot)
165 if not os.path.exists(portage_db.db_path):
Ralph Nathan446aee92015-03-23 14:44:56 -0700166 logging.warning('PortageDB directory not found: %s', portage_db.db_path)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700167 return
168
169 for pkg in portage_db.InstalledPackages():
170 pkg_files = []
171 pkg_size = 0
172 cpf = '%s/%s' % (pkg.category, pkg.pf)
173 for typ, rel_path in pkg.ListContents():
174 # We ignore other entries like for example "dir".
175 if not typ in (pkg.OBJ, pkg.SYM):
176 continue
177 # We ignore files installed in the SYSROOT that weren't copied to the
178 # image.
179 if not rel_path in self._files:
180 continue
181 pkg_files.append(rel_path)
182 file_data = self._files[rel_path]
183 if 'ebuild' in file_data:
Lann Martinffb95162018-08-28 12:02:54 -0600184 logging.warning('Duplicated entry for %s: %s and %s',
Ralph Nathan446aee92015-03-23 14:44:56 -0700185 rel_path, file_data['ebuild'], cpf)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700186 file_data['ebuild'] = cpf
187 pkg_size += file_data['size']
188 # Ignore packages that don't install any file.
189 if not pkg_files:
190 continue
191 self._ebuilds[cpf] = {
192 'size': pkg_size,
193 'files': len(pkg_files),
194 'atom': '%s/%s' % (pkg.category, pkg.package),
195 'version': pkg.version,
196 }
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700197 # TODO(deymo): Parse dependencies between ebuilds.
198
199 def ComputeELFFileDeps(self):
200 """Computes the dependencies between files.
201
202 Computes the dependencies between the files in the root directory passed
203 during construction. The dependencies are inferred for ELF files.
204 The list of dependencies for each file in the passed rootfs as a dict().
205 The result's keys are the relative path of the files and the value of each
206 file is a list of dependencies. A dependency is a tuple (dep_path,
207 dep_type) where the dep_path is relative path from the passed root to the
208 dependent file and dep_type is one the following strings stating how the
209 dependency was discovered:
210 'ldd': The dependent ELF file is listed as needed in the dynamic section.
211 'symlink': The dependent file is a symlink to the depending.
212 If there are dependencies of a given type whose target file wasn't
213 determined, a tuple (None, dep_type) is included. This is the case for
214 example is a program uses library that wasn't found.
215 """
216 ldpaths = lddtree.LoadLdpaths(self._root)
217
218 # First iteration over all the files in root searching for symlinks and
219 # non-regular files.
220 parseelf_args = []
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400221 for rel_path, file_data in self._files.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700222 if rel_path in self._symlinks or rel_path in self._hardlinks:
223 continue
224
225 full_path = os.path.join(self._root, rel_path)
226 st = os.lstat(full_path)
227 if not stat.S_ISREG(st.st_mode):
228 continue
229 parseelf_args.append((self._root, rel_path, ldpaths))
230
231 # Parallelize the ELF lookup step since it is quite expensive.
232 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
233 if not x is None)
234
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400235 for rel_path, elf in elfs.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700236 file_data = self._files[rel_path]
Alex Deymo365b10c2014-08-25 13:14:28 -0700237 # Fill in the ftype if not set yet. We complete this value at this point
238 # to avoid re-parsing the ELF file later.
239 if not 'ftype' in file_data:
240 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
241 if ftype:
242 file_data['ftype'] = ftype
243
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700244 file_deps = file_data.get('deps', {})
245 # Dependencies based on the result of ldd.
246 for lib in elf.get('needed', []):
247 lib_path = elf['libs'][lib]['path']
248 if not 'ldd' in file_deps:
249 file_deps['ldd'] = []
250 file_deps['ldd'].append(lib_path)
251
252 if file_deps:
253 file_data['deps'] = file_deps
254
Alex Deymo365b10c2014-08-25 13:14:28 -0700255 def ComputeFileTypes(self):
256 """Computes all the missing file type for the files in the root."""
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400257 for rel_path, file_data in self._files.items():
Alex Deymo365b10c2014-08-25 13:14:28 -0700258 if 'ftype' in file_data:
259 continue
260 ftype = self._file_type_decoder.GetType(rel_path)
261 if ftype:
262 file_data['ftype'] = ftype
263
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700264
265def ParseArgs(argv):
266 """Return parsed commandline arguments."""
267
268 parser = commandline.ArgumentParser()
269 parser.add_argument(
270 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
271 help='number of simultaneous jobs.')
272 parser.add_argument(
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700273 '--sysroot', type='path', metavar='SYSROOT',
274 help='parse portage DB for ebuild information from the provided sysroot.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700275 parser.add_argument(
276 '--json', type='path',
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700277 help='store information in JSON file.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700278
279 parser.add_argument(
280 'root', type='path',
281 help='path to the directory where the rootfs is mounted.')
282
283 opts = parser.parse_args(argv)
284 opts.Freeze()
285 return opts
286
287
288def main(argv):
289 """Main function to start the script."""
290 opts = ParseArgs(argv)
Ralph Nathan5a582ff2015-03-20 18:18:30 -0700291 logging.debug('Options are %s', opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700292
293 dt = DepTracker(opts.root, jobs=opts.jobs)
294 dt.Init()
295
296 dt.ComputeELFFileDeps()
Alex Deymo365b10c2014-08-25 13:14:28 -0700297 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700298
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700299 if opts.sysroot:
300 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700301
302 if opts.json:
303 dt.SaveJSON(opts.json)