blob: 9e0d1a290df7aa89469bdf7798d6eab5ce818b9a [file] [log] [blame]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07001# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Mike Frysinger750c5f52014-09-16 16:16:57 -04005"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07006
7Some files in the image are installed to provide some functionality, such as
8chrome, shill or bluetoothd provide different functionality that can be
9present or not on a given build. Many other files are dependencies from these
10files that need to be present in the image for them to work. These dependencies
11come from needed shared libraries, executed files and other configuration files
12read.
13
14This script currently discovers dependencies between ELF files for libraries
15required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070016libraries loaded at runtime with dlopen(). It also computes size and file type
17in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070018"""
19
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070020import json
21import multiprocessing
22import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070023import stat
24
Mike Frysinger81a5ea62019-08-25 14:12:20 -040025from six.moves import map as imap
26
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070027from chromite.lib import commandline
Ralph Nathan5a582ff2015-03-20 18:18:30 -070028from chromite.lib import cros_logging as logging
Alex Deymo365b10c2014-08-25 13:14:28 -070029from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070030from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070031from chromite.lib import portage_util
Mike Frysinger95452702021-01-23 00:07:22 -050032from chromite.third_party import lddtree
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070033
34
35# Regex to parse Gentoo atoms. This should match the following ebuild names,
36# splitting the package name from the version.
37# without version:
38# chromeos-base/tty
39# chromeos-base/libchrome-271506
40# sys-kernel/chromeos-kernel-3_8
41# with version:
42# chromeos-base/tty-0.0.1-r4
43# chromeos-base/libchrome-271506-r5
44# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
45RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
46RE_EBUILD_WITH_VERSION = (
47 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
48
49
50def ParseELFWithArgs(args):
51 """Wrapper to parseelf.ParseELF accepting a single arg.
52
53 This wrapper is required to use multiprocessing.Pool.map function.
54
55 Returns:
56 A 2-tuple with the passed relative path and the result of ParseELF(). On
57 error, when ParseELF() returns None, this function returns None.
58 """
59 elf = parseelf.ParseELF(*args)
60 if elf is None:
61 return
62 return args[1], elf
63
64
65class DepTracker(object):
66 """Tracks dependencies and file information in a root directory.
67
68 This class computes dependencies and other information related to the files
69 in the root image.
70 """
71
72 def __init__(self, root, jobs=1):
73 root_st = os.lstat(root)
74 if not stat.S_ISDIR(root_st.st_mode):
75 raise Exception('root (%s) must be a directory' % root)
76 self._root = root.rstrip('/') + '/'
Alex Deymo365b10c2014-08-25 13:14:28 -070077 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070078
79 # A wrapper to the multiprocess map function. We avoid launching a pool
80 # of processes when jobs is 1 so python exceptions kill the main process,
81 # useful for debugging.
82 if jobs > 1:
83 self._pool = multiprocessing.Pool(jobs)
84 self._imap = self._pool.map
85 else:
Mike Frysinger81a5ea62019-08-25 14:12:20 -040086 self._imap = imap
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070087
88 self._files = {}
89 self._ebuilds = {}
90
91 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
92 # to point to the lowest lexicographically file with the same inode.
93 self._symlinks = {}
94 self._hardlinks = {}
95
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070096 def Init(self):
97 """Generates the initial list of files."""
98 # First iteration over all the files in root searching for symlinks and
99 # non-regular files.
100 seen_inodes = {}
101 for basepath, _, filenames in sorted(os.walk(self._root)):
102 for filename in sorted(filenames):
103 full_path = os.path.join(basepath, filename)
104 rel_path = full_path[len(self._root):]
105 st = os.lstat(full_path)
106
107 file_data = {
108 'size': st.st_size,
109 }
110 self._files[rel_path] = file_data
111
112 # Track symlinks.
113 if stat.S_ISLNK(st.st_mode):
114 link_path = os.readlink(full_path)
115 # lddtree's normpath handles a little more cases than the os.path
116 # version. In particular, it handles the '//' case.
117 self._symlinks[rel_path] = (
118 link_path.lstrip('/') if link_path and link_path[0] == '/' else
119 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
120 link_path)))
121 file_data['deps'] = {
Mike Frysingere65f3752014-12-08 00:46:39 -0500122 'symlink': [self._symlinks[rel_path]]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700123 }
124
125 # Track hardlinks.
126 if st.st_ino in seen_inodes:
127 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
128 continue
129 seen_inodes[st.st_ino] = rel_path
130
131 def SaveJSON(self, filename):
132 """Save the computed information to a JSON file.
133
134 Args:
135 filename: The destination JSON file.
136 """
137 data = {
138 'files': self._files,
139 'ebuilds': self._ebuilds,
140 }
141 json.dump(data, open(filename, 'w'))
142
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700143 def ComputeEbuildDeps(self, sysroot):
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700144 """Compute the dependencies between ebuilds and files.
145
146 Iterates over the list of ebuilds in the database and annotates the files
147 with the ebuilds they are in. For each ebuild installing a file in the root,
148 also compute the direct dependencies. Stores the information internally.
149
150 Args:
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700151 sysroot: The path to the sysroot, for example "/build/link".
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700152 """
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700153 portage_db = portage_util.PortageDB(sysroot)
154 if not os.path.exists(portage_db.db_path):
Ralph Nathan446aee92015-03-23 14:44:56 -0700155 logging.warning('PortageDB directory not found: %s', portage_db.db_path)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700156 return
157
158 for pkg in portage_db.InstalledPackages():
159 pkg_files = []
160 pkg_size = 0
161 cpf = '%s/%s' % (pkg.category, pkg.pf)
162 for typ, rel_path in pkg.ListContents():
163 # We ignore other entries like for example "dir".
164 if not typ in (pkg.OBJ, pkg.SYM):
165 continue
166 # We ignore files installed in the SYSROOT that weren't copied to the
167 # image.
168 if not rel_path in self._files:
169 continue
170 pkg_files.append(rel_path)
171 file_data = self._files[rel_path]
172 if 'ebuild' in file_data:
Lann Martinffb95162018-08-28 12:02:54 -0600173 logging.warning('Duplicated entry for %s: %s and %s',
Ralph Nathan446aee92015-03-23 14:44:56 -0700174 rel_path, file_data['ebuild'], cpf)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700175 file_data['ebuild'] = cpf
176 pkg_size += file_data['size']
177 # Ignore packages that don't install any file.
178 if not pkg_files:
179 continue
180 self._ebuilds[cpf] = {
181 'size': pkg_size,
182 'files': len(pkg_files),
183 'atom': '%s/%s' % (pkg.category, pkg.package),
184 'version': pkg.version,
185 }
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700186 # TODO(deymo): Parse dependencies between ebuilds.
187
188 def ComputeELFFileDeps(self):
189 """Computes the dependencies between files.
190
191 Computes the dependencies between the files in the root directory passed
192 during construction. The dependencies are inferred for ELF files.
193 The list of dependencies for each file in the passed rootfs as a dict().
194 The result's keys are the relative path of the files and the value of each
195 file is a list of dependencies. A dependency is a tuple (dep_path,
196 dep_type) where the dep_path is relative path from the passed root to the
197 dependent file and dep_type is one the following strings stating how the
198 dependency was discovered:
199 'ldd': The dependent ELF file is listed as needed in the dynamic section.
200 'symlink': The dependent file is a symlink to the depending.
201 If there are dependencies of a given type whose target file wasn't
202 determined, a tuple (None, dep_type) is included. This is the case for
203 example is a program uses library that wasn't found.
204 """
205 ldpaths = lddtree.LoadLdpaths(self._root)
206
207 # First iteration over all the files in root searching for symlinks and
208 # non-regular files.
209 parseelf_args = []
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400210 for rel_path, file_data in self._files.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700211 if rel_path in self._symlinks or rel_path in self._hardlinks:
212 continue
213
214 full_path = os.path.join(self._root, rel_path)
215 st = os.lstat(full_path)
216 if not stat.S_ISREG(st.st_mode):
217 continue
218 parseelf_args.append((self._root, rel_path, ldpaths))
219
220 # Parallelize the ELF lookup step since it is quite expensive.
221 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
222 if not x is None)
223
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400224 for rel_path, elf in elfs.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700225 file_data = self._files[rel_path]
Alex Deymo365b10c2014-08-25 13:14:28 -0700226 # Fill in the ftype if not set yet. We complete this value at this point
227 # to avoid re-parsing the ELF file later.
228 if not 'ftype' in file_data:
229 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
230 if ftype:
231 file_data['ftype'] = ftype
232
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700233 file_deps = file_data.get('deps', {})
234 # Dependencies based on the result of ldd.
235 for lib in elf.get('needed', []):
236 lib_path = elf['libs'][lib]['path']
237 if not 'ldd' in file_deps:
238 file_deps['ldd'] = []
239 file_deps['ldd'].append(lib_path)
240
241 if file_deps:
242 file_data['deps'] = file_deps
243
Alex Deymo365b10c2014-08-25 13:14:28 -0700244 def ComputeFileTypes(self):
245 """Computes all the missing file type for the files in the root."""
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400246 for rel_path, file_data in self._files.items():
Alex Deymo365b10c2014-08-25 13:14:28 -0700247 if 'ftype' in file_data:
248 continue
249 ftype = self._file_type_decoder.GetType(rel_path)
250 if ftype:
251 file_data['ftype'] = ftype
252
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700253
254def ParseArgs(argv):
255 """Return parsed commandline arguments."""
256
257 parser = commandline.ArgumentParser()
258 parser.add_argument(
259 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
260 help='number of simultaneous jobs.')
261 parser.add_argument(
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700262 '--sysroot', type='path', metavar='SYSROOT',
263 help='parse portage DB for ebuild information from the provided sysroot.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700264 parser.add_argument(
265 '--json', type='path',
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700266 help='store information in JSON file.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700267
268 parser.add_argument(
269 'root', type='path',
270 help='path to the directory where the rootfs is mounted.')
271
272 opts = parser.parse_args(argv)
273 opts.Freeze()
274 return opts
275
276
277def main(argv):
278 """Main function to start the script."""
279 opts = ParseArgs(argv)
Ralph Nathan5a582ff2015-03-20 18:18:30 -0700280 logging.debug('Options are %s', opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700281
282 dt = DepTracker(opts.root, jobs=opts.jobs)
283 dt.Init()
284
285 dt.ComputeELFFileDeps()
Alex Deymo365b10c2014-08-25 13:14:28 -0700286 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700287
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700288 if opts.sysroot:
289 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700290
291 if opts.json:
292 dt.SaveJSON(opts.json)