blob: 8aa24772a7f10373ca5cd9249f566d148426b612 [file] [log] [blame]
Mike Frysingere58c0e22017-10-04 15:43:30 -04001# -*- coding: utf-8 -*-
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07002# Copyright 2014 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Mike Frysinger750c5f52014-09-16 16:16:57 -04006"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07007
8Some files in the image are installed to provide some functionality, such as
9chrome, shill or bluetoothd provide different functionality that can be
10present or not on a given build. Many other files are dependencies from these
11files that need to be present in the image for them to work. These dependencies
12come from needed shared libraries, executed files and other configuration files
13read.
14
15This script currently discovers dependencies between ELF files for libraries
16required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070017libraries loaded at runtime with dlopen(). It also computes size and file type
18in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070019"""
20
Mike Frysinger383367e2014-09-16 15:06:17 -040021from __future__ import print_function
22
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070023import json
24import multiprocessing
25import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070026import stat
Mike Frysinger03b983f2020-02-21 02:31:49 -050027import sys
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070028
Mike Frysinger81a5ea62019-08-25 14:12:20 -040029from six.moves import map as imap
30
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070031from chromite.lib import commandline
Ralph Nathan5a582ff2015-03-20 18:18:30 -070032from chromite.lib import cros_logging as logging
Alex Deymo365b10c2014-08-25 13:14:28 -070033from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070034from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070035from chromite.lib import portage_util
Mike Frysinger95452702021-01-23 00:07:22 -050036from chromite.third_party import lddtree
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070037
38
Mike Frysinger03b983f2020-02-21 02:31:49 -050039assert sys.version_info >= (3, 6), 'This module requires Python 3.6+'
40
41
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070042# Regex to parse Gentoo atoms. This should match the following ebuild names,
43# splitting the package name from the version.
44# without version:
45# chromeos-base/tty
46# chromeos-base/libchrome-271506
47# sys-kernel/chromeos-kernel-3_8
48# with version:
49# chromeos-base/tty-0.0.1-r4
50# chromeos-base/libchrome-271506-r5
51# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
52RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
53RE_EBUILD_WITH_VERSION = (
54 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
55
56
57def ParseELFWithArgs(args):
58 """Wrapper to parseelf.ParseELF accepting a single arg.
59
60 This wrapper is required to use multiprocessing.Pool.map function.
61
62 Returns:
63 A 2-tuple with the passed relative path and the result of ParseELF(). On
64 error, when ParseELF() returns None, this function returns None.
65 """
66 elf = parseelf.ParseELF(*args)
67 if elf is None:
68 return
69 return args[1], elf
70
71
72class DepTracker(object):
73 """Tracks dependencies and file information in a root directory.
74
75 This class computes dependencies and other information related to the files
76 in the root image.
77 """
78
79 def __init__(self, root, jobs=1):
80 root_st = os.lstat(root)
81 if not stat.S_ISDIR(root_st.st_mode):
82 raise Exception('root (%s) must be a directory' % root)
83 self._root = root.rstrip('/') + '/'
Alex Deymo365b10c2014-08-25 13:14:28 -070084 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070085
86 # A wrapper to the multiprocess map function. We avoid launching a pool
87 # of processes when jobs is 1 so python exceptions kill the main process,
88 # useful for debugging.
89 if jobs > 1:
90 self._pool = multiprocessing.Pool(jobs)
91 self._imap = self._pool.map
92 else:
Mike Frysinger81a5ea62019-08-25 14:12:20 -040093 self._imap = imap
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070094
95 self._files = {}
96 self._ebuilds = {}
97
98 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
99 # to point to the lowest lexicographically file with the same inode.
100 self._symlinks = {}
101 self._hardlinks = {}
102
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700103 def Init(self):
104 """Generates the initial list of files."""
105 # First iteration over all the files in root searching for symlinks and
106 # non-regular files.
107 seen_inodes = {}
108 for basepath, _, filenames in sorted(os.walk(self._root)):
109 for filename in sorted(filenames):
110 full_path = os.path.join(basepath, filename)
111 rel_path = full_path[len(self._root):]
112 st = os.lstat(full_path)
113
114 file_data = {
115 'size': st.st_size,
116 }
117 self._files[rel_path] = file_data
118
119 # Track symlinks.
120 if stat.S_ISLNK(st.st_mode):
121 link_path = os.readlink(full_path)
122 # lddtree's normpath handles a little more cases than the os.path
123 # version. In particular, it handles the '//' case.
124 self._symlinks[rel_path] = (
125 link_path.lstrip('/') if link_path and link_path[0] == '/' else
126 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
127 link_path)))
128 file_data['deps'] = {
Mike Frysingere65f3752014-12-08 00:46:39 -0500129 'symlink': [self._symlinks[rel_path]]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700130 }
131
132 # Track hardlinks.
133 if st.st_ino in seen_inodes:
134 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
135 continue
136 seen_inodes[st.st_ino] = rel_path
137
138 def SaveJSON(self, filename):
139 """Save the computed information to a JSON file.
140
141 Args:
142 filename: The destination JSON file.
143 """
144 data = {
145 'files': self._files,
146 'ebuilds': self._ebuilds,
147 }
148 json.dump(data, open(filename, 'w'))
149
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700150 def ComputeEbuildDeps(self, sysroot):
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700151 """Compute the dependencies between ebuilds and files.
152
153 Iterates over the list of ebuilds in the database and annotates the files
154 with the ebuilds they are in. For each ebuild installing a file in the root,
155 also compute the direct dependencies. Stores the information internally.
156
157 Args:
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700158 sysroot: The path to the sysroot, for example "/build/link".
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700159 """
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700160 portage_db = portage_util.PortageDB(sysroot)
161 if not os.path.exists(portage_db.db_path):
Ralph Nathan446aee92015-03-23 14:44:56 -0700162 logging.warning('PortageDB directory not found: %s', portage_db.db_path)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700163 return
164
165 for pkg in portage_db.InstalledPackages():
166 pkg_files = []
167 pkg_size = 0
168 cpf = '%s/%s' % (pkg.category, pkg.pf)
169 for typ, rel_path in pkg.ListContents():
170 # We ignore other entries like for example "dir".
171 if not typ in (pkg.OBJ, pkg.SYM):
172 continue
173 # We ignore files installed in the SYSROOT that weren't copied to the
174 # image.
175 if not rel_path in self._files:
176 continue
177 pkg_files.append(rel_path)
178 file_data = self._files[rel_path]
179 if 'ebuild' in file_data:
Lann Martinffb95162018-08-28 12:02:54 -0600180 logging.warning('Duplicated entry for %s: %s and %s',
Ralph Nathan446aee92015-03-23 14:44:56 -0700181 rel_path, file_data['ebuild'], cpf)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700182 file_data['ebuild'] = cpf
183 pkg_size += file_data['size']
184 # Ignore packages that don't install any file.
185 if not pkg_files:
186 continue
187 self._ebuilds[cpf] = {
188 'size': pkg_size,
189 'files': len(pkg_files),
190 'atom': '%s/%s' % (pkg.category, pkg.package),
191 'version': pkg.version,
192 }
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700193 # TODO(deymo): Parse dependencies between ebuilds.
194
195 def ComputeELFFileDeps(self):
196 """Computes the dependencies between files.
197
198 Computes the dependencies between the files in the root directory passed
199 during construction. The dependencies are inferred for ELF files.
200 The list of dependencies for each file in the passed rootfs as a dict().
201 The result's keys are the relative path of the files and the value of each
202 file is a list of dependencies. A dependency is a tuple (dep_path,
203 dep_type) where the dep_path is relative path from the passed root to the
204 dependent file and dep_type is one the following strings stating how the
205 dependency was discovered:
206 'ldd': The dependent ELF file is listed as needed in the dynamic section.
207 'symlink': The dependent file is a symlink to the depending.
208 If there are dependencies of a given type whose target file wasn't
209 determined, a tuple (None, dep_type) is included. This is the case for
210 example is a program uses library that wasn't found.
211 """
212 ldpaths = lddtree.LoadLdpaths(self._root)
213
214 # First iteration over all the files in root searching for symlinks and
215 # non-regular files.
216 parseelf_args = []
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400217 for rel_path, file_data in self._files.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700218 if rel_path in self._symlinks or rel_path in self._hardlinks:
219 continue
220
221 full_path = os.path.join(self._root, rel_path)
222 st = os.lstat(full_path)
223 if not stat.S_ISREG(st.st_mode):
224 continue
225 parseelf_args.append((self._root, rel_path, ldpaths))
226
227 # Parallelize the ELF lookup step since it is quite expensive.
228 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
229 if not x is None)
230
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400231 for rel_path, elf in elfs.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700232 file_data = self._files[rel_path]
Alex Deymo365b10c2014-08-25 13:14:28 -0700233 # Fill in the ftype if not set yet. We complete this value at this point
234 # to avoid re-parsing the ELF file later.
235 if not 'ftype' in file_data:
236 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
237 if ftype:
238 file_data['ftype'] = ftype
239
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700240 file_deps = file_data.get('deps', {})
241 # Dependencies based on the result of ldd.
242 for lib in elf.get('needed', []):
243 lib_path = elf['libs'][lib]['path']
244 if not 'ldd' in file_deps:
245 file_deps['ldd'] = []
246 file_deps['ldd'].append(lib_path)
247
248 if file_deps:
249 file_data['deps'] = file_deps
250
Alex Deymo365b10c2014-08-25 13:14:28 -0700251 def ComputeFileTypes(self):
252 """Computes all the missing file type for the files in the root."""
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400253 for rel_path, file_data in self._files.items():
Alex Deymo365b10c2014-08-25 13:14:28 -0700254 if 'ftype' in file_data:
255 continue
256 ftype = self._file_type_decoder.GetType(rel_path)
257 if ftype:
258 file_data['ftype'] = ftype
259
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700260
261def ParseArgs(argv):
262 """Return parsed commandline arguments."""
263
264 parser = commandline.ArgumentParser()
265 parser.add_argument(
266 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
267 help='number of simultaneous jobs.')
268 parser.add_argument(
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700269 '--sysroot', type='path', metavar='SYSROOT',
270 help='parse portage DB for ebuild information from the provided sysroot.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700271 parser.add_argument(
272 '--json', type='path',
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700273 help='store information in JSON file.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700274
275 parser.add_argument(
276 'root', type='path',
277 help='path to the directory where the rootfs is mounted.')
278
279 opts = parser.parse_args(argv)
280 opts.Freeze()
281 return opts
282
283
284def main(argv):
285 """Main function to start the script."""
286 opts = ParseArgs(argv)
Ralph Nathan5a582ff2015-03-20 18:18:30 -0700287 logging.debug('Options are %s', opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700288
289 dt = DepTracker(opts.root, jobs=opts.jobs)
290 dt.Init()
291
292 dt.ComputeELFFileDeps()
Alex Deymo365b10c2014-08-25 13:14:28 -0700293 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700294
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700295 if opts.sysroot:
296 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700297
298 if opts.json:
299 dt.SaveJSON(opts.json)