blob: 168e775de893f2c0b228e6c9d0af7b9de64565d2 [file] [log] [blame]
Mike Frysingere58c0e22017-10-04 15:43:30 -04001# -*- coding: utf-8 -*-
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07002# Copyright 2014 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Mike Frysinger750c5f52014-09-16 16:16:57 -04006"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07007
8Some files in the image are installed to provide some functionality, such as
9chrome, shill or bluetoothd provide different functionality that can be
10present or not on a given build. Many other files are dependencies from these
11files that need to be present in the image for them to work. These dependencies
12come from needed shared libraries, executed files and other configuration files
13read.
14
15This script currently discovers dependencies between ELF files for libraries
16required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070017libraries loaded at runtime with dlopen(). It also computes size and file type
18in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070019"""
20
Mike Frysinger383367e2014-09-16 15:06:17 -040021from __future__ import print_function
22
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070023import json
24import multiprocessing
25import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070026import stat
27
Mike Frysinger81a5ea62019-08-25 14:12:20 -040028from six.moves import map as imap
29
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070030from chromite.lib import commandline
Ralph Nathan5a582ff2015-03-20 18:18:30 -070031from chromite.lib import cros_logging as logging
Alex Deymo365b10c2014-08-25 13:14:28 -070032from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070033from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070034from chromite.lib import portage_util
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070035from chromite.scripts import lddtree
36
37
38# Regex to parse Gentoo atoms. This should match the following ebuild names,
39# splitting the package name from the version.
40# without version:
41# chromeos-base/tty
42# chromeos-base/libchrome-271506
43# sys-kernel/chromeos-kernel-3_8
44# with version:
45# chromeos-base/tty-0.0.1-r4
46# chromeos-base/libchrome-271506-r5
47# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
48RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
49RE_EBUILD_WITH_VERSION = (
50 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
51
52
53def ParseELFWithArgs(args):
54 """Wrapper to parseelf.ParseELF accepting a single arg.
55
56 This wrapper is required to use multiprocessing.Pool.map function.
57
58 Returns:
59 A 2-tuple with the passed relative path and the result of ParseELF(). On
60 error, when ParseELF() returns None, this function returns None.
61 """
62 elf = parseelf.ParseELF(*args)
63 if elf is None:
64 return
65 return args[1], elf
66
67
68class DepTracker(object):
69 """Tracks dependencies and file information in a root directory.
70
71 This class computes dependencies and other information related to the files
72 in the root image.
73 """
74
75 def __init__(self, root, jobs=1):
76 root_st = os.lstat(root)
77 if not stat.S_ISDIR(root_st.st_mode):
78 raise Exception('root (%s) must be a directory' % root)
79 self._root = root.rstrip('/') + '/'
Alex Deymo365b10c2014-08-25 13:14:28 -070080 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070081
82 # A wrapper to the multiprocess map function. We avoid launching a pool
83 # of processes when jobs is 1 so python exceptions kill the main process,
84 # useful for debugging.
85 if jobs > 1:
86 self._pool = multiprocessing.Pool(jobs)
87 self._imap = self._pool.map
88 else:
Mike Frysinger81a5ea62019-08-25 14:12:20 -040089 self._imap = imap
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070090
91 self._files = {}
92 self._ebuilds = {}
93
94 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
95 # to point to the lowest lexicographically file with the same inode.
96 self._symlinks = {}
97 self._hardlinks = {}
98
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070099 def Init(self):
100 """Generates the initial list of files."""
101 # First iteration over all the files in root searching for symlinks and
102 # non-regular files.
103 seen_inodes = {}
104 for basepath, _, filenames in sorted(os.walk(self._root)):
105 for filename in sorted(filenames):
106 full_path = os.path.join(basepath, filename)
107 rel_path = full_path[len(self._root):]
108 st = os.lstat(full_path)
109
110 file_data = {
111 'size': st.st_size,
112 }
113 self._files[rel_path] = file_data
114
115 # Track symlinks.
116 if stat.S_ISLNK(st.st_mode):
117 link_path = os.readlink(full_path)
118 # lddtree's normpath handles a little more cases than the os.path
119 # version. In particular, it handles the '//' case.
120 self._symlinks[rel_path] = (
121 link_path.lstrip('/') if link_path and link_path[0] == '/' else
122 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
123 link_path)))
124 file_data['deps'] = {
Mike Frysingere65f3752014-12-08 00:46:39 -0500125 'symlink': [self._symlinks[rel_path]]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700126 }
127
128 # Track hardlinks.
129 if st.st_ino in seen_inodes:
130 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
131 continue
132 seen_inodes[st.st_ino] = rel_path
133
134 def SaveJSON(self, filename):
135 """Save the computed information to a JSON file.
136
137 Args:
138 filename: The destination JSON file.
139 """
140 data = {
141 'files': self._files,
142 'ebuilds': self._ebuilds,
143 }
144 json.dump(data, open(filename, 'w'))
145
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700146 def ComputeEbuildDeps(self, sysroot):
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700147 """Compute the dependencies between ebuilds and files.
148
149 Iterates over the list of ebuilds in the database and annotates the files
150 with the ebuilds they are in. For each ebuild installing a file in the root,
151 also compute the direct dependencies. Stores the information internally.
152
153 Args:
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700154 sysroot: The path to the sysroot, for example "/build/link".
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700155 """
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700156 portage_db = portage_util.PortageDB(sysroot)
157 if not os.path.exists(portage_db.db_path):
Ralph Nathan446aee92015-03-23 14:44:56 -0700158 logging.warning('PortageDB directory not found: %s', portage_db.db_path)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700159 return
160
161 for pkg in portage_db.InstalledPackages():
162 pkg_files = []
163 pkg_size = 0
164 cpf = '%s/%s' % (pkg.category, pkg.pf)
165 for typ, rel_path in pkg.ListContents():
166 # We ignore other entries like for example "dir".
167 if not typ in (pkg.OBJ, pkg.SYM):
168 continue
169 # We ignore files installed in the SYSROOT that weren't copied to the
170 # image.
171 if not rel_path in self._files:
172 continue
173 pkg_files.append(rel_path)
174 file_data = self._files[rel_path]
175 if 'ebuild' in file_data:
Lann Martinffb95162018-08-28 12:02:54 -0600176 logging.warning('Duplicated entry for %s: %s and %s',
Ralph Nathan446aee92015-03-23 14:44:56 -0700177 rel_path, file_data['ebuild'], cpf)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700178 file_data['ebuild'] = cpf
179 pkg_size += file_data['size']
180 # Ignore packages that don't install any file.
181 if not pkg_files:
182 continue
183 self._ebuilds[cpf] = {
184 'size': pkg_size,
185 'files': len(pkg_files),
186 'atom': '%s/%s' % (pkg.category, pkg.package),
187 'version': pkg.version,
188 }
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700189 # TODO(deymo): Parse dependencies between ebuilds.
190
191 def ComputeELFFileDeps(self):
192 """Computes the dependencies between files.
193
194 Computes the dependencies between the files in the root directory passed
195 during construction. The dependencies are inferred for ELF files.
196 The list of dependencies for each file in the passed rootfs as a dict().
197 The result's keys are the relative path of the files and the value of each
198 file is a list of dependencies. A dependency is a tuple (dep_path,
199 dep_type) where the dep_path is relative path from the passed root to the
200 dependent file and dep_type is one the following strings stating how the
201 dependency was discovered:
202 'ldd': The dependent ELF file is listed as needed in the dynamic section.
203 'symlink': The dependent file is a symlink to the depending.
204 If there are dependencies of a given type whose target file wasn't
205 determined, a tuple (None, dep_type) is included. This is the case for
206 example is a program uses library that wasn't found.
207 """
208 ldpaths = lddtree.LoadLdpaths(self._root)
209
210 # First iteration over all the files in root searching for symlinks and
211 # non-regular files.
212 parseelf_args = []
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400213 for rel_path, file_data in self._files.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700214 if rel_path in self._symlinks or rel_path in self._hardlinks:
215 continue
216
217 full_path = os.path.join(self._root, rel_path)
218 st = os.lstat(full_path)
219 if not stat.S_ISREG(st.st_mode):
220 continue
221 parseelf_args.append((self._root, rel_path, ldpaths))
222
223 # Parallelize the ELF lookup step since it is quite expensive.
224 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
225 if not x is None)
226
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400227 for rel_path, elf in elfs.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700228 file_data = self._files[rel_path]
Alex Deymo365b10c2014-08-25 13:14:28 -0700229 # Fill in the ftype if not set yet. We complete this value at this point
230 # to avoid re-parsing the ELF file later.
231 if not 'ftype' in file_data:
232 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
233 if ftype:
234 file_data['ftype'] = ftype
235
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700236 file_deps = file_data.get('deps', {})
237 # Dependencies based on the result of ldd.
238 for lib in elf.get('needed', []):
239 lib_path = elf['libs'][lib]['path']
240 if not 'ldd' in file_deps:
241 file_deps['ldd'] = []
242 file_deps['ldd'].append(lib_path)
243
244 if file_deps:
245 file_data['deps'] = file_deps
246
Alex Deymo365b10c2014-08-25 13:14:28 -0700247 def ComputeFileTypes(self):
248 """Computes all the missing file type for the files in the root."""
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400249 for rel_path, file_data in self._files.items():
Alex Deymo365b10c2014-08-25 13:14:28 -0700250 if 'ftype' in file_data:
251 continue
252 ftype = self._file_type_decoder.GetType(rel_path)
253 if ftype:
254 file_data['ftype'] = ftype
255
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700256
257def ParseArgs(argv):
258 """Return parsed commandline arguments."""
259
260 parser = commandline.ArgumentParser()
261 parser.add_argument(
262 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
263 help='number of simultaneous jobs.')
264 parser.add_argument(
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700265 '--sysroot', type='path', metavar='SYSROOT',
266 help='parse portage DB for ebuild information from the provided sysroot.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700267 parser.add_argument(
268 '--json', type='path',
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700269 help='store information in JSON file.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700270
271 parser.add_argument(
272 'root', type='path',
273 help='path to the directory where the rootfs is mounted.')
274
275 opts = parser.parse_args(argv)
276 opts.Freeze()
277 return opts
278
279
280def main(argv):
281 """Main function to start the script."""
282 opts = ParseArgs(argv)
Ralph Nathan5a582ff2015-03-20 18:18:30 -0700283 logging.debug('Options are %s', opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700284
285 dt = DepTracker(opts.root, jobs=opts.jobs)
286 dt.Init()
287
288 dt.ComputeELFFileDeps()
Alex Deymo365b10c2014-08-25 13:14:28 -0700289 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700290
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700291 if opts.sysroot:
292 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700293
294 if opts.json:
295 dt.SaveJSON(opts.json)