blob: 4c8977091dd25b3d8932afff43cdc03d084d0ab4 [file] [log] [blame]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07001# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Mike Frysinger750c5f52014-09-16 16:16:57 -04005"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07006
7Some files in the image are installed to provide some functionality, such as
8chrome, shill or bluetoothd provide different functionality that can be
9present or not on a given build. Many other files are dependencies from these
10files that need to be present in the image for them to work. These dependencies
11come from needed shared libraries, executed files and other configuration files
12read.
13
14This script currently discovers dependencies between ELF files for libraries
15required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070016libraries loaded at runtime with dlopen(). It also computes size and file type
17in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070018"""
19
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070020import json
Chris McDonald59650c32021-07-20 15:29:28 -060021import logging
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070022import multiprocessing
23import os
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070024import stat
25
Chris McDonald59650c32021-07-20 15:29:28 -060026from chromite.third_party import lddtree
27
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070028from chromite.lib import commandline
Alex Deymo365b10c2014-08-25 13:14:28 -070029from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070030from chromite.lib import parseelf
Alex Deymoc99dd0b2014-09-09 16:15:17 -070031from chromite.lib import portage_util
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070032
33
34# Regex to parse Gentoo atoms. This should match the following ebuild names,
35# splitting the package name from the version.
36# without version:
37# chromeos-base/tty
38# chromeos-base/libchrome-271506
39# sys-kernel/chromeos-kernel-3_8
40# with version:
41# chromeos-base/tty-0.0.1-r4
42# chromeos-base/libchrome-271506-r5
43# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
44RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
45RE_EBUILD_WITH_VERSION = (
46 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
47
48
49def ParseELFWithArgs(args):
50 """Wrapper to parseelf.ParseELF accepting a single arg.
51
52 This wrapper is required to use multiprocessing.Pool.map function.
53
54 Returns:
55 A 2-tuple with the passed relative path and the result of ParseELF(). On
56 error, when ParseELF() returns None, this function returns None.
57 """
58 elf = parseelf.ParseELF(*args)
59 if elf is None:
60 return
61 return args[1], elf
62
63
64class DepTracker(object):
65 """Tracks dependencies and file information in a root directory.
66
67 This class computes dependencies and other information related to the files
68 in the root image.
69 """
70
71 def __init__(self, root, jobs=1):
72 root_st = os.lstat(root)
73 if not stat.S_ISDIR(root_st.st_mode):
74 raise Exception('root (%s) must be a directory' % root)
75 self._root = root.rstrip('/') + '/'
Alex Deymo365b10c2014-08-25 13:14:28 -070076 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070077
78 # A wrapper to the multiprocess map function. We avoid launching a pool
79 # of processes when jobs is 1 so python exceptions kill the main process,
80 # useful for debugging.
81 if jobs > 1:
82 self._pool = multiprocessing.Pool(jobs)
83 self._imap = self._pool.map
84 else:
Mike Frysingere852b072021-05-21 12:39:03 -040085 self._imap = map
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070086
87 self._files = {}
88 self._ebuilds = {}
89
90 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
91 # to point to the lowest lexicographically file with the same inode.
92 self._symlinks = {}
93 self._hardlinks = {}
94
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070095 def Init(self):
96 """Generates the initial list of files."""
97 # First iteration over all the files in root searching for symlinks and
98 # non-regular files.
99 seen_inodes = {}
100 for basepath, _, filenames in sorted(os.walk(self._root)):
101 for filename in sorted(filenames):
102 full_path = os.path.join(basepath, filename)
103 rel_path = full_path[len(self._root):]
104 st = os.lstat(full_path)
105
106 file_data = {
107 'size': st.st_size,
108 }
109 self._files[rel_path] = file_data
110
111 # Track symlinks.
112 if stat.S_ISLNK(st.st_mode):
113 link_path = os.readlink(full_path)
114 # lddtree's normpath handles a little more cases than the os.path
115 # version. In particular, it handles the '//' case.
116 self._symlinks[rel_path] = (
117 link_path.lstrip('/') if link_path and link_path[0] == '/' else
118 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
119 link_path)))
120 file_data['deps'] = {
Mike Frysingere65f3752014-12-08 00:46:39 -0500121 'symlink': [self._symlinks[rel_path]]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700122 }
123
124 # Track hardlinks.
125 if st.st_ino in seen_inodes:
126 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
127 continue
128 seen_inodes[st.st_ino] = rel_path
129
130 def SaveJSON(self, filename):
131 """Save the computed information to a JSON file.
132
133 Args:
134 filename: The destination JSON file.
135 """
136 data = {
137 'files': self._files,
138 'ebuilds': self._ebuilds,
139 }
140 json.dump(data, open(filename, 'w'))
141
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700142 def ComputeEbuildDeps(self, sysroot):
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700143 """Compute the dependencies between ebuilds and files.
144
145 Iterates over the list of ebuilds in the database and annotates the files
146 with the ebuilds they are in. For each ebuild installing a file in the root,
147 also compute the direct dependencies. Stores the information internally.
148
149 Args:
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700150 sysroot: The path to the sysroot, for example "/build/link".
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700151 """
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700152 portage_db = portage_util.PortageDB(sysroot)
153 if not os.path.exists(portage_db.db_path):
Ralph Nathan446aee92015-03-23 14:44:56 -0700154 logging.warning('PortageDB directory not found: %s', portage_db.db_path)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700155 return
156
157 for pkg in portage_db.InstalledPackages():
158 pkg_files = []
159 pkg_size = 0
160 cpf = '%s/%s' % (pkg.category, pkg.pf)
161 for typ, rel_path in pkg.ListContents():
162 # We ignore other entries like for example "dir".
163 if not typ in (pkg.OBJ, pkg.SYM):
164 continue
165 # We ignore files installed in the SYSROOT that weren't copied to the
166 # image.
167 if not rel_path in self._files:
168 continue
169 pkg_files.append(rel_path)
170 file_data = self._files[rel_path]
171 if 'ebuild' in file_data:
Lann Martinffb95162018-08-28 12:02:54 -0600172 logging.warning('Duplicated entry for %s: %s and %s',
Ralph Nathan446aee92015-03-23 14:44:56 -0700173 rel_path, file_data['ebuild'], cpf)
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700174 file_data['ebuild'] = cpf
175 pkg_size += file_data['size']
176 # Ignore packages that don't install any file.
177 if not pkg_files:
178 continue
179 self._ebuilds[cpf] = {
180 'size': pkg_size,
181 'files': len(pkg_files),
182 'atom': '%s/%s' % (pkg.category, pkg.package),
183 'version': pkg.version,
184 }
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700185 # TODO(deymo): Parse dependencies between ebuilds.
186
187 def ComputeELFFileDeps(self):
188 """Computes the dependencies between files.
189
190 Computes the dependencies between the files in the root directory passed
191 during construction. The dependencies are inferred for ELF files.
192 The list of dependencies for each file in the passed rootfs as a dict().
193 The result's keys are the relative path of the files and the value of each
194 file is a list of dependencies. A dependency is a tuple (dep_path,
195 dep_type) where the dep_path is relative path from the passed root to the
196 dependent file and dep_type is one the following strings stating how the
197 dependency was discovered:
198 'ldd': The dependent ELF file is listed as needed in the dynamic section.
199 'symlink': The dependent file is a symlink to the depending.
200 If there are dependencies of a given type whose target file wasn't
201 determined, a tuple (None, dep_type) is included. This is the case for
202 example is a program uses library that wasn't found.
203 """
204 ldpaths = lddtree.LoadLdpaths(self._root)
205
206 # First iteration over all the files in root searching for symlinks and
207 # non-regular files.
208 parseelf_args = []
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400209 for rel_path, file_data in self._files.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700210 if rel_path in self._symlinks or rel_path in self._hardlinks:
211 continue
212
213 full_path = os.path.join(self._root, rel_path)
214 st = os.lstat(full_path)
215 if not stat.S_ISREG(st.st_mode):
216 continue
217 parseelf_args.append((self._root, rel_path, ldpaths))
218
219 # Parallelize the ELF lookup step since it is quite expensive.
220 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
221 if not x is None)
222
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400223 for rel_path, elf in elfs.items():
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700224 file_data = self._files[rel_path]
Alex Deymo365b10c2014-08-25 13:14:28 -0700225 # Fill in the ftype if not set yet. We complete this value at this point
226 # to avoid re-parsing the ELF file later.
227 if not 'ftype' in file_data:
228 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
229 if ftype:
230 file_data['ftype'] = ftype
231
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700232 file_deps = file_data.get('deps', {})
233 # Dependencies based on the result of ldd.
234 for lib in elf.get('needed', []):
235 lib_path = elf['libs'][lib]['path']
236 if not 'ldd' in file_deps:
237 file_deps['ldd'] = []
238 file_deps['ldd'].append(lib_path)
239
240 if file_deps:
241 file_data['deps'] = file_deps
242
Alex Deymo365b10c2014-08-25 13:14:28 -0700243 def ComputeFileTypes(self):
244 """Computes all the missing file type for the files in the root."""
Mike Frysinger0bdbc102019-06-13 15:27:29 -0400245 for rel_path, file_data in self._files.items():
Alex Deymo365b10c2014-08-25 13:14:28 -0700246 if 'ftype' in file_data:
247 continue
248 ftype = self._file_type_decoder.GetType(rel_path)
249 if ftype:
250 file_data['ftype'] = ftype
251
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700252
253def ParseArgs(argv):
254 """Return parsed commandline arguments."""
255
256 parser = commandline.ArgumentParser()
257 parser.add_argument(
258 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
259 help='number of simultaneous jobs.')
260 parser.add_argument(
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700261 '--sysroot', type='path', metavar='SYSROOT',
262 help='parse portage DB for ebuild information from the provided sysroot.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700263 parser.add_argument(
264 '--json', type='path',
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700265 help='store information in JSON file.')
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700266
267 parser.add_argument(
268 'root', type='path',
269 help='path to the directory where the rootfs is mounted.')
270
271 opts = parser.parse_args(argv)
272 opts.Freeze()
273 return opts
274
275
276def main(argv):
277 """Main function to start the script."""
278 opts = ParseArgs(argv)
Ralph Nathan5a582ff2015-03-20 18:18:30 -0700279 logging.debug('Options are %s', opts)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700280
281 dt = DepTracker(opts.root, jobs=opts.jobs)
282 dt.Init()
283
284 dt.ComputeELFFileDeps()
Alex Deymo365b10c2014-08-25 13:14:28 -0700285 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700286
Alex Deymoc99dd0b2014-09-09 16:15:17 -0700287 if opts.sysroot:
288 dt.ComputeEbuildDeps(opts.sysroot)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700289
290 if opts.json:
291 dt.SaveJSON(opts.json)