blob: 547e9e54ea8de5242baef48f9e73bb274a0332a9 [file] [log] [blame]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07001# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Mike Frysinger750c5f52014-09-16 16:16:57 -04005"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07006
7Some files in the image are installed to provide some functionality, such as
8chrome, shill or bluetoothd provide different functionality that can be
9present or not on a given build. Many other files are dependencies from these
10files that need to be present in the image for them to work. These dependencies
11come from needed shared libraries, executed files and other configuration files
12read.
13
14This script currently discovers dependencies between ELF files for libraries
15required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070016libraries loaded at runtime with dlopen(). It also computes size and file type
17in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070018"""
19
Mike Frysinger383367e2014-09-16 15:06:17 -040020from __future__ import print_function
21
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070022import itertools
23import json
24import multiprocessing
25import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070026import stat
27
28from chromite.lib import commandline
Ralph Nathan5a582ff2015-03-20 18:18:30 -070029from chromite.lib import cros_logging as logging
Alex Deymo365b10c2014-08-25 13:14:28 -070030from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070031from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070032from chromite.lib import portage_util
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070033from chromite.scripts import lddtree
34
35
36# Regex to parse Gentoo atoms. This should match the following ebuild names,
37# splitting the package name from the version.
38# without version:
39# chromeos-base/tty
40# chromeos-base/libchrome-271506
41# sys-kernel/chromeos-kernel-3_8
42# with version:
43# chromeos-base/tty-0.0.1-r4
44# chromeos-base/libchrome-271506-r5
45# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
46RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
47RE_EBUILD_WITH_VERSION = (
48 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
49
50
51def ParseELFWithArgs(args):
52 """Wrapper to parseelf.ParseELF accepting a single arg.
53
54 This wrapper is required to use multiprocessing.Pool.map function.
55
56 Returns:
57 A 2-tuple with the passed relative path and the result of ParseELF(). On
58 error, when ParseELF() returns None, this function returns None.
59 """
60 elf = parseelf.ParseELF(*args)
61 if elf is None:
62 return
63 return args[1], elf
64
65
66class DepTracker(object):
67 """Tracks dependencies and file information in a root directory.
68
69 This class computes dependencies and other information related to the files
70 in the root image.
71 """
72
73 def __init__(self, root, jobs=1):
74 root_st = os.lstat(root)
75 if not stat.S_ISDIR(root_st.st_mode):
76 raise Exception('root (%s) must be a directory' % root)
77 self._root = root.rstrip('/') + '/'
Alex Deymo365b10c2014-08-25 13:14:28 -070078 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070079
80 # A wrapper to the multiprocess map function. We avoid launching a pool
81 # of processes when jobs is 1 so python exceptions kill the main process,
82 # useful for debugging.
83 if jobs > 1:
84 self._pool = multiprocessing.Pool(jobs)
85 self._imap = self._pool.map
86 else:
87 self._imap = itertools.imap
88
89 self._files = {}
90 self._ebuilds = {}
91
92 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
93 # to point to the lowest lexicographically file with the same inode.
94 self._symlinks = {}
95 self._hardlinks = {}
96
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070097 def Init(self):
98 """Generates the initial list of files."""
99 # First iteration over all the files in root searching for symlinks and
100 # non-regular files.
101 seen_inodes = {}
102 for basepath, _, filenames in sorted(os.walk(self._root)):
103 for filename in sorted(filenames):
104 full_path = os.path.join(basepath, filename)
105 rel_path = full_path[len(self._root):]
106 st = os.lstat(full_path)
107
108 file_data = {
109 'size': st.st_size,
110 }
111 self._files[rel_path] = file_data
112
113 # Track symlinks.
114 if stat.S_ISLNK(st.st_mode):
115 link_path = os.readlink(full_path)
116 # lddtree's normpath handles a little more cases than the os.path
117 # version. In particular, it handles the '//' case.
118 self._symlinks[rel_path] = (
119 link_path.lstrip('/') if link_path and link_path[0] == '/' else
120 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
121 link_path)))
122 file_data['deps'] = {
Mike Frysingere65f3752014-12-08 00:46:39 -0500123 'symlink': [self._symlinks[rel_path]]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700124 }
125
126 # Track hardlinks.
127 if st.st_ino in seen_inodes:
128 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
129 continue
130 seen_inodes[st.st_ino] = rel_path
131
132 def SaveJSON(self, filename):
133 """Save the computed information to a JSON file.
134
135 Args:
136 filename: The destination JSON file.
137 """
138 data = {
139 'files': self._files,
140 'ebuilds': self._ebuilds,
141 }
142 json.dump(data, open(filename, 'w'))
143
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700144 def ComputeEbuildDeps(self, sysroot):
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700145 """Compute the dependencies between ebuilds and files.
146
147 Iterates over the list of ebuilds in the database and annotates the files
148 with the ebuilds they are in. For each ebuild installing a file in the root,
149 also compute the direct dependencies. Stores the information internally.
150
151 Args:
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700152 sysroot: The path to the sysroot, for example "/build/link".
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700153 """
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700154 portage_db = portage_util.PortageDB(sysroot)
155 if not os.path.exists(portage_db.db_path):
Ralph Nathan446aee92015-03-23 14:44:56 -0700156 logging.warning('PortageDB directory not found: %s', portage_db.db_path)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700157 return
158
159 for pkg in portage_db.InstalledPackages():
160 pkg_files = []
161 pkg_size = 0
162 cpf = '%s/%s' % (pkg.category, pkg.pf)
163 for typ, rel_path in pkg.ListContents():
164 # We ignore other entries like for example "dir".
165 if not typ in (pkg.OBJ, pkg.SYM):
166 continue
167 # We ignore files installed in the SYSROOT that weren't copied to the
168 # image.
169 if not rel_path in self._files:
170 continue
171 pkg_files.append(rel_path)
172 file_data = self._files[rel_path]
173 if 'ebuild' in file_data:
Ralph Nathan446aee92015-03-23 14:44:56 -0700174 logging.warning('Duplicated entry for %s: %s and %',
175 rel_path, file_data['ebuild'], cpf)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700176 file_data['ebuild'] = cpf
177 pkg_size += file_data['size']
178 # Ignore packages that don't install any file.
179 if not pkg_files:
180 continue
181 self._ebuilds[cpf] = {
182 'size': pkg_size,
183 'files': len(pkg_files),
184 'atom': '%s/%s' % (pkg.category, pkg.package),
185 'version': pkg.version,
186 }
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700187 # TODO(deymo): Parse dependencies between ebuilds.
188
189 def ComputeELFFileDeps(self):
190 """Computes the dependencies between files.
191
192 Computes the dependencies between the files in the root directory passed
193 during construction. The dependencies are inferred for ELF files.
194 The list of dependencies for each file in the passed rootfs as a dict().
195 The result's keys are the relative path of the files and the value of each
196 file is a list of dependencies. A dependency is a tuple (dep_path,
197 dep_type) where the dep_path is relative path from the passed root to the
198 dependent file and dep_type is one the following strings stating how the
199 dependency was discovered:
200 'ldd': The dependent ELF file is listed as needed in the dynamic section.
201 'symlink': The dependent file is a symlink to the depending.
202 If there are dependencies of a given type whose target file wasn't
203 determined, a tuple (None, dep_type) is included. This is the case for
204 example is a program uses library that wasn't found.
205 """
206 ldpaths = lddtree.LoadLdpaths(self._root)
207
208 # First iteration over all the files in root searching for symlinks and
209 # non-regular files.
210 parseelf_args = []
211 for rel_path, file_data in self._files.iteritems():
212 if rel_path in self._symlinks or rel_path in self._hardlinks:
213 continue
214
215 full_path = os.path.join(self._root, rel_path)
216 st = os.lstat(full_path)
217 if not stat.S_ISREG(st.st_mode):
218 continue
219 parseelf_args.append((self._root, rel_path, ldpaths))
220
221 # Parallelize the ELF lookup step since it is quite expensive.
222 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
223 if not x is None)
224
225 for rel_path, elf in elfs.iteritems():
226 file_data = self._files[rel_path]
Alex Deymo365b10c2014-08-25 13:14:28 -0700227 # Fill in the ftype if not set yet. We complete this value at this point
228 # to avoid re-parsing the ELF file later.
229 if not 'ftype' in file_data:
230 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
231 if ftype:
232 file_data['ftype'] = ftype
233
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700234 file_deps = file_data.get('deps', {})
235 # Dependencies based on the result of ldd.
236 for lib in elf.get('needed', []):
237 lib_path = elf['libs'][lib]['path']
238 if not 'ldd' in file_deps:
239 file_deps['ldd'] = []
240 file_deps['ldd'].append(lib_path)
241
242 if file_deps:
243 file_data['deps'] = file_deps
244
Alex Deymo365b10c2014-08-25 13:14:28 -0700245 def ComputeFileTypes(self):
246 """Computes all the missing file type for the files in the root."""
247 for rel_path, file_data in self._files.iteritems():
248 if 'ftype' in file_data:
249 continue
250 ftype = self._file_type_decoder.GetType(rel_path)
251 if ftype:
252 file_data['ftype'] = ftype
253
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700254
255def ParseArgs(argv):
256 """Return parsed commandline arguments."""
257
258 parser = commandline.ArgumentParser()
259 parser.add_argument(
260 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
261 help='number of simultaneous jobs.')
262 parser.add_argument(
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700263 '--sysroot', type='path', metavar='SYSROOT',
264 help='parse portage DB for ebuild information from the provided sysroot.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700265 parser.add_argument(
266 '--json', type='path',
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700267 help='store information in JSON file.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700268
269 parser.add_argument(
270 'root', type='path',
271 help='path to the directory where the rootfs is mounted.')
272
273 opts = parser.parse_args(argv)
274 opts.Freeze()
275 return opts
276
277
278def main(argv):
279 """Main function to start the script."""
280 opts = ParseArgs(argv)
Ralph Nathan5a582ff2015-03-20 18:18:30 -0700281 logging.debug('Options are %s', opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700282
283 dt = DepTracker(opts.root, jobs=opts.jobs)
284 dt.Init()
285
286 dt.ComputeELFFileDeps()
Alex Deymo365b10c2014-08-25 13:14:28 -0700287 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700288
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700289 if opts.sysroot:
290 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700291
292 if opts.json:
293 dt.SaveJSON(opts.json)