blob: a99f7e7aefa5561aaf74cead3337a31c3ac7ce27 [file] [log] [blame]
Mike Frysingere58c0e22017-10-04 15:43:30 -04001# -*- coding: utf-8 -*-
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07002# Copyright 2014 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Mike Frysinger750c5f52014-09-16 16:16:57 -04006"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07007
8Some files in the image are installed to provide some functionality, such as
9chrome, shill or bluetoothd provide different functionality that can be
10present or not on a given build. Many other files are dependencies from these
11files that need to be present in the image for them to work. These dependencies
12come from needed shared libraries, executed files and other configuration files
13read.
14
15This script currently discovers dependencies between ELF files for libraries
16required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070017libraries loaded at runtime with dlopen(). It also computes size and file type
18in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070019"""
20
Mike Frysinger383367e2014-09-16 15:06:17 -040021from __future__ import print_function
22
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070023import itertools
24import json
25import multiprocessing
26import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070027import stat
28
29from chromite.lib import commandline
Ralph Nathan5a582ff2015-03-20 18:18:30 -070030from chromite.lib import cros_logging as logging
Alex Deymo365b10c2014-08-25 13:14:28 -070031from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070032from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070033from chromite.lib import portage_util
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070034from chromite.scripts import lddtree
35
36
37# Regex to parse Gentoo atoms. This should match the following ebuild names,
38# splitting the package name from the version.
39# without version:
40# chromeos-base/tty
41# chromeos-base/libchrome-271506
42# sys-kernel/chromeos-kernel-3_8
43# with version:
44# chromeos-base/tty-0.0.1-r4
45# chromeos-base/libchrome-271506-r5
46# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
47RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
48RE_EBUILD_WITH_VERSION = (
49 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
50
51
52def ParseELFWithArgs(args):
53 """Wrapper to parseelf.ParseELF accepting a single arg.
54
55 This wrapper is required to use multiprocessing.Pool.map function.
56
57 Returns:
58 A 2-tuple with the passed relative path and the result of ParseELF(). On
59 error, when ParseELF() returns None, this function returns None.
60 """
61 elf = parseelf.ParseELF(*args)
62 if elf is None:
63 return
64 return args[1], elf
65
66
67class DepTracker(object):
68 """Tracks dependencies and file information in a root directory.
69
70 This class computes dependencies and other information related to the files
71 in the root image.
72 """
73
74 def __init__(self, root, jobs=1):
75 root_st = os.lstat(root)
76 if not stat.S_ISDIR(root_st.st_mode):
77 raise Exception('root (%s) must be a directory' % root)
78 self._root = root.rstrip('/') + '/'
Alex Deymo365b10c2014-08-25 13:14:28 -070079 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070080
81 # A wrapper to the multiprocess map function. We avoid launching a pool
82 # of processes when jobs is 1 so python exceptions kill the main process,
83 # useful for debugging.
84 if jobs > 1:
85 self._pool = multiprocessing.Pool(jobs)
86 self._imap = self._pool.map
87 else:
88 self._imap = itertools.imap
89
90 self._files = {}
91 self._ebuilds = {}
92
93 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
94 # to point to the lowest lexicographically file with the same inode.
95 self._symlinks = {}
96 self._hardlinks = {}
97
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070098 def Init(self):
99 """Generates the initial list of files."""
100 # First iteration over all the files in root searching for symlinks and
101 # non-regular files.
102 seen_inodes = {}
103 for basepath, _, filenames in sorted(os.walk(self._root)):
104 for filename in sorted(filenames):
105 full_path = os.path.join(basepath, filename)
106 rel_path = full_path[len(self._root):]
107 st = os.lstat(full_path)
108
109 file_data = {
110 'size': st.st_size,
111 }
112 self._files[rel_path] = file_data
113
114 # Track symlinks.
115 if stat.S_ISLNK(st.st_mode):
116 link_path = os.readlink(full_path)
117 # lddtree's normpath handles a little more cases than the os.path
118 # version. In particular, it handles the '//' case.
119 self._symlinks[rel_path] = (
120 link_path.lstrip('/') if link_path and link_path[0] == '/' else
121 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
122 link_path)))
123 file_data['deps'] = {
Mike Frysingere65f3752014-12-08 00:46:39 -0500124 'symlink': [self._symlinks[rel_path]]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700125 }
126
127 # Track hardlinks.
128 if st.st_ino in seen_inodes:
129 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
130 continue
131 seen_inodes[st.st_ino] = rel_path
132
133 def SaveJSON(self, filename):
134 """Save the computed information to a JSON file.
135
136 Args:
137 filename: The destination JSON file.
138 """
139 data = {
140 'files': self._files,
141 'ebuilds': self._ebuilds,
142 }
143 json.dump(data, open(filename, 'w'))
144
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700145 def ComputeEbuildDeps(self, sysroot):
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700146 """Compute the dependencies between ebuilds and files.
147
148 Iterates over the list of ebuilds in the database and annotates the files
149 with the ebuilds they are in. For each ebuild installing a file in the root,
150 also compute the direct dependencies. Stores the information internally.
151
152 Args:
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700153 sysroot: The path to the sysroot, for example "/build/link".
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700154 """
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700155 portage_db = portage_util.PortageDB(sysroot)
156 if not os.path.exists(portage_db.db_path):
Ralph Nathan446aee92015-03-23 14:44:56 -0700157 logging.warning('PortageDB directory not found: %s', portage_db.db_path)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700158 return
159
160 for pkg in portage_db.InstalledPackages():
161 pkg_files = []
162 pkg_size = 0
163 cpf = '%s/%s' % (pkg.category, pkg.pf)
164 for typ, rel_path in pkg.ListContents():
165 # We ignore other entries like for example "dir".
166 if not typ in (pkg.OBJ, pkg.SYM):
167 continue
168 # We ignore files installed in the SYSROOT that weren't copied to the
169 # image.
170 if not rel_path in self._files:
171 continue
172 pkg_files.append(rel_path)
173 file_data = self._files[rel_path]
174 if 'ebuild' in file_data:
Ralph Nathan446aee92015-03-23 14:44:56 -0700175 logging.warning('Duplicated entry for %s: %s and %',
176 rel_path, file_data['ebuild'], cpf)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700177 file_data['ebuild'] = cpf
178 pkg_size += file_data['size']
179 # Ignore packages that don't install any file.
180 if not pkg_files:
181 continue
182 self._ebuilds[cpf] = {
183 'size': pkg_size,
184 'files': len(pkg_files),
185 'atom': '%s/%s' % (pkg.category, pkg.package),
186 'version': pkg.version,
187 }
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700188 # TODO(deymo): Parse dependencies between ebuilds.
189
190 def ComputeELFFileDeps(self):
191 """Computes the dependencies between files.
192
193 Computes the dependencies between the files in the root directory passed
194 during construction. The dependencies are inferred for ELF files.
195 The list of dependencies for each file in the passed rootfs as a dict().
196 The result's keys are the relative path of the files and the value of each
197 file is a list of dependencies. A dependency is a tuple (dep_path,
198 dep_type) where the dep_path is relative path from the passed root to the
199 dependent file and dep_type is one the following strings stating how the
200 dependency was discovered:
201 'ldd': The dependent ELF file is listed as needed in the dynamic section.
202 'symlink': The dependent file is a symlink to the depending.
203 If there are dependencies of a given type whose target file wasn't
204 determined, a tuple (None, dep_type) is included. This is the case for
205 example is a program uses library that wasn't found.
206 """
207 ldpaths = lddtree.LoadLdpaths(self._root)
208
209 # First iteration over all the files in root searching for symlinks and
210 # non-regular files.
211 parseelf_args = []
212 for rel_path, file_data in self._files.iteritems():
213 if rel_path in self._symlinks or rel_path in self._hardlinks:
214 continue
215
216 full_path = os.path.join(self._root, rel_path)
217 st = os.lstat(full_path)
218 if not stat.S_ISREG(st.st_mode):
219 continue
220 parseelf_args.append((self._root, rel_path, ldpaths))
221
222 # Parallelize the ELF lookup step since it is quite expensive.
223 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
224 if not x is None)
225
226 for rel_path, elf in elfs.iteritems():
227 file_data = self._files[rel_path]
Alex Deymo365b10c2014-08-25 13:14:28 -0700228 # Fill in the ftype if not set yet. We complete this value at this point
229 # to avoid re-parsing the ELF file later.
230 if not 'ftype' in file_data:
231 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
232 if ftype:
233 file_data['ftype'] = ftype
234
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700235 file_deps = file_data.get('deps', {})
236 # Dependencies based on the result of ldd.
237 for lib in elf.get('needed', []):
238 lib_path = elf['libs'][lib]['path']
239 if not 'ldd' in file_deps:
240 file_deps['ldd'] = []
241 file_deps['ldd'].append(lib_path)
242
243 if file_deps:
244 file_data['deps'] = file_deps
245
Alex Deymo365b10c2014-08-25 13:14:28 -0700246 def ComputeFileTypes(self):
247 """Computes all the missing file type for the files in the root."""
248 for rel_path, file_data in self._files.iteritems():
249 if 'ftype' in file_data:
250 continue
251 ftype = self._file_type_decoder.GetType(rel_path)
252 if ftype:
253 file_data['ftype'] = ftype
254
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700255
256def ParseArgs(argv):
257 """Return parsed commandline arguments."""
258
259 parser = commandline.ArgumentParser()
260 parser.add_argument(
261 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
262 help='number of simultaneous jobs.')
263 parser.add_argument(
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700264 '--sysroot', type='path', metavar='SYSROOT',
265 help='parse portage DB for ebuild information from the provided sysroot.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700266 parser.add_argument(
267 '--json', type='path',
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700268 help='store information in JSON file.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700269
270 parser.add_argument(
271 'root', type='path',
272 help='path to the directory where the rootfs is mounted.')
273
274 opts = parser.parse_args(argv)
275 opts.Freeze()
276 return opts
277
278
279def main(argv):
280 """Main function to start the script."""
281 opts = ParseArgs(argv)
Ralph Nathan5a582ff2015-03-20 18:18:30 -0700282 logging.debug('Options are %s', opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700283
284 dt = DepTracker(opts.root, jobs=opts.jobs)
285 dt.Init()
286
287 dt.ComputeELFFileDeps()
Alex Deymo365b10c2014-08-25 13:14:28 -0700288 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700289
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700290 if opts.sysroot:
291 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700292
293 if opts.json:
294 dt.SaveJSON(opts.json)