blob: 9e276bc6ac58b23f24797c455817b2f0eea1ec18 [file] [log] [blame]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07001# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Mike Frysinger750c5f52014-09-16 16:16:57 -04005"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07006
7Some files in the image are installed to provide some functionality, such as
8chrome, shill or bluetoothd provide different functionality that can be
9present or not on a given build. Many other files are dependencies from these
10files that need to be present in the image for them to work. These dependencies
11come from needed shared libraries, executed files and other configuration files
12read.
13
14This script currently discovers dependencies between ELF files for libraries
15required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070016libraries loaded at runtime with dlopen(). It also computes size and file type
17in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070018"""
19
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070020import json
21import multiprocessing
22import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070023import stat
24
25from chromite.lib import commandline
Ralph Nathan5a582ff2015-03-20 18:18:30 -070026from chromite.lib import cros_logging as logging
Alex Deymo365b10c2014-08-25 13:14:28 -070027from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070028from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070029from chromite.lib import portage_util
Mike Frysinger95452702021-01-23 00:07:22 -050030from chromite.third_party import lddtree
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070031
32
33# Regex to parse Gentoo atoms. This should match the following ebuild names,
34# splitting the package name from the version.
35# without version:
36# chromeos-base/tty
37# chromeos-base/libchrome-271506
38# sys-kernel/chromeos-kernel-3_8
39# with version:
40# chromeos-base/tty-0.0.1-r4
41# chromeos-base/libchrome-271506-r5
42# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
43RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
44RE_EBUILD_WITH_VERSION = (
45 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
46
47
48def ParseELFWithArgs(args):
49 """Wrapper to parseelf.ParseELF accepting a single arg.
50
51 This wrapper is required to use multiprocessing.Pool.map function.
52
53 Returns:
54 A 2-tuple with the passed relative path and the result of ParseELF(). On
55 error, when ParseELF() returns None, this function returns None.
56 """
57 elf = parseelf.ParseELF(*args)
58 if elf is None:
59 return
60 return args[1], elf
61
62
63class DepTracker(object):
64 """Tracks dependencies and file information in a root directory.
65
66 This class computes dependencies and other information related to the files
67 in the root image.
68 """
69
70 def __init__(self, root, jobs=1):
71 root_st = os.lstat(root)
72 if not stat.S_ISDIR(root_st.st_mode):
73 raise Exception('root (%s) must be a directory' % root)
74 self._root = root.rstrip('/') + '/'
Alex Deymo365b10c2014-08-25 13:14:28 -070075 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070076
77 # A wrapper to the multiprocess map function. We avoid launching a pool
78 # of processes when jobs is 1 so python exceptions kill the main process,
79 # useful for debugging.
80 if jobs > 1:
81 self._pool = multiprocessing.Pool(jobs)
82 self._imap = self._pool.map
83 else:
Mike Frysingere852b072021-05-21 12:39:03 -040084 self._imap = map
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070085
86 self._files = {}
87 self._ebuilds = {}
88
89 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
90 # to point to the lowest lexicographically file with the same inode.
91 self._symlinks = {}
92 self._hardlinks = {}
93
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070094 def Init(self):
95 """Generates the initial list of files."""
96 # First iteration over all the files in root searching for symlinks and
97 # non-regular files.
98 seen_inodes = {}
99 for basepath, _, filenames in sorted(os.walk(self._root)):
100 for filename in sorted(filenames):
101 full_path = os.path.join(basepath, filename)
102 rel_path = full_path[len(self._root):]
103 st = os.lstat(full_path)
104
105 file_data = {
106 'size': st.st_size,
107 }
108 self._files[rel_path] = file_data
109
110 # Track symlinks.
111 if stat.S_ISLNK(st.st_mode):
112 link_path = os.readlink(full_path)
113 # lddtree's normpath handles a little more cases than the os.path
114 # version. In particular, it handles the '//' case.
115 self._symlinks[rel_path] = (
116 link_path.lstrip('/') if link_path and link_path[0] == '/' else
117 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
118 link_path)))
119 file_data['deps'] = {
Mike Frysingere65f3752014-12-08 00:46:39 -0500120 'symlink': [self._symlinks[rel_path]]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700121 }
122
123 # Track hardlinks.
124 if st.st_ino in seen_inodes:
125 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
126 continue
127 seen_inodes[st.st_ino] = rel_path
128
129 def SaveJSON(self, filename):
130 """Save the computed information to a JSON file.
131
132 Args:
133 filename: The destination JSON file.
134 """
135 data = {
136 'files': self._files,
137 'ebuilds': self._ebuilds,
138 }
139 json.dump(data, open(filename, 'w'))
140
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700141 def ComputeEbuildDeps(self, sysroot):
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700142 """Compute the dependencies between ebuilds and files.
143
144 Iterates over the list of ebuilds in the database and annotates the files
145 with the ebuilds they are in. For each ebuild installing a file in the root,
146 also compute the direct dependencies. Stores the information internally.
147
148 Args:
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700149 sysroot: The path to the sysroot, for example "/build/link".
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700150 """
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700151 portage_db = portage_util.PortageDB(sysroot)
152 if not os.path.exists(portage_db.db_path):
Ralph Nathan446aee92015-03-23 14:44:56 -0700153 logging.warning('PortageDB directory not found: %s', portage_db.db_path)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700154 return
155
156 for pkg in portage_db.InstalledPackages():
157 pkg_files = []
158 pkg_size = 0
159 cpf = '%s/%s' % (pkg.category, pkg.pf)
160 for typ, rel_path in pkg.ListContents():
161 # We ignore other entries like for example "dir".
162 if not typ in (pkg.OBJ, pkg.SYM):
163 continue
164 # We ignore files installed in the SYSROOT that weren't copied to the
165 # image.
166 if not rel_path in self._files:
167 continue
168 pkg_files.append(rel_path)
169 file_data = self._files[rel_path]
170 if 'ebuild' in file_data:
Lann Martinffb95162018-08-28 12:02:54 -0600171 logging.warning('Duplicated entry for %s: %s and %s',
Ralph Nathan446aee92015-03-23 14:44:56 -0700172 rel_path, file_data['ebuild'], cpf)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700173 file_data['ebuild'] = cpf
174 pkg_size += file_data['size']
175 # Ignore packages that don't install any file.
176 if not pkg_files:
177 continue
178 self._ebuilds[cpf] = {
179 'size': pkg_size,
180 'files': len(pkg_files),
181 'atom': '%s/%s' % (pkg.category, pkg.package),
182 'version': pkg.version,
183 }
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700184 # TODO(deymo): Parse dependencies between ebuilds.
185
186 def ComputeELFFileDeps(self):
187 """Computes the dependencies between files.
188
189 Computes the dependencies between the files in the root directory passed
190 during construction. The dependencies are inferred for ELF files.
191 The list of dependencies for each file in the passed rootfs as a dict().
192 The result's keys are the relative path of the files and the value of each
193 file is a list of dependencies. A dependency is a tuple (dep_path,
194 dep_type) where the dep_path is relative path from the passed root to the
195 dependent file and dep_type is one the following strings stating how the
196 dependency was discovered:
197 'ldd': The dependent ELF file is listed as needed in the dynamic section.
198 'symlink': The dependent file is a symlink to the depending.
199 If there are dependencies of a given type whose target file wasn't
200 determined, a tuple (None, dep_type) is included. This is the case for
201 example is a program uses library that wasn't found.
202 """
203 ldpaths = lddtree.LoadLdpaths(self._root)
204
205 # First iteration over all the files in root searching for symlinks and
206 # non-regular files.
207 parseelf_args = []
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400208 for rel_path, file_data in self._files.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700209 if rel_path in self._symlinks or rel_path in self._hardlinks:
210 continue
211
212 full_path = os.path.join(self._root, rel_path)
213 st = os.lstat(full_path)
214 if not stat.S_ISREG(st.st_mode):
215 continue
216 parseelf_args.append((self._root, rel_path, ldpaths))
217
218 # Parallelize the ELF lookup step since it is quite expensive.
219 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
220 if not x is None)
221
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400222 for rel_path, elf in elfs.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700223 file_data = self._files[rel_path]
Alex Deymo365b10c2014-08-25 13:14:28 -0700224 # Fill in the ftype if not set yet. We complete this value at this point
225 # to avoid re-parsing the ELF file later.
226 if not 'ftype' in file_data:
227 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
228 if ftype:
229 file_data['ftype'] = ftype
230
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700231 file_deps = file_data.get('deps', {})
232 # Dependencies based on the result of ldd.
233 for lib in elf.get('needed', []):
234 lib_path = elf['libs'][lib]['path']
235 if not 'ldd' in file_deps:
236 file_deps['ldd'] = []
237 file_deps['ldd'].append(lib_path)
238
239 if file_deps:
240 file_data['deps'] = file_deps
241
Alex Deymo365b10c2014-08-25 13:14:28 -0700242 def ComputeFileTypes(self):
243 """Computes all the missing file type for the files in the root."""
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400244 for rel_path, file_data in self._files.items():
Alex Deymo365b10c2014-08-25 13:14:28 -0700245 if 'ftype' in file_data:
246 continue
247 ftype = self._file_type_decoder.GetType(rel_path)
248 if ftype:
249 file_data['ftype'] = ftype
250
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700251
252def ParseArgs(argv):
253 """Return parsed commandline arguments."""
254
255 parser = commandline.ArgumentParser()
256 parser.add_argument(
257 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
258 help='number of simultaneous jobs.')
259 parser.add_argument(
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700260 '--sysroot', type='path', metavar='SYSROOT',
261 help='parse portage DB for ebuild information from the provided sysroot.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700262 parser.add_argument(
263 '--json', type='path',
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700264 help='store information in JSON file.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700265
266 parser.add_argument(
267 'root', type='path',
268 help='path to the directory where the rootfs is mounted.')
269
270 opts = parser.parse_args(argv)
271 opts.Freeze()
272 return opts
273
274
275def main(argv):
276 """Main function to start the script."""
277 opts = ParseArgs(argv)
Ralph Nathan5a582ff2015-03-20 18:18:30 -0700278 logging.debug('Options are %s', opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700279
280 dt = DepTracker(opts.root, jobs=opts.jobs)
281 dt.Init()
282
283 dt.ComputeELFFileDeps()
Alex Deymo365b10c2014-08-25 13:14:28 -0700284 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700285
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700286 if opts.sysroot:
287 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700288
289 if opts.json:
290 dt.SaveJSON(opts.json)