blob: ca8478ca1a8724ceb39f590ae9b67aabfc6bb8ce [file] [log] [blame]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07001# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""\
6Script to discover dependencies and other file information from a build.
7
8Some files in the image are installed to provide some functionality, such as
9chrome, shill or bluetoothd provide different functionality that can be
10present or not on a given build. Many other files are dependencies from these
11files that need to be present in the image for them to work. These dependencies
12come from needed shared libraries, executed files and other configuration files
13read.
14
15This script currently discovers dependencies between ELF files for libraries
16required at load time (libraries loaded by the dynamic linker) but not
17libraries loaded at runtime with dlopen().
18"""
19
20import itertools
21import json
22import multiprocessing
23import os
24import re
25import stat
26
27from chromite.lib import commandline
28from chromite.lib import cros_build_lib
29from chromite.lib import parseelf
30from chromite.scripts import lddtree
31
32
33# Regex to parse Gentoo atoms. This should match the following ebuild names,
34# splitting the package name from the version.
35# without version:
36# chromeos-base/tty
37# chromeos-base/libchrome-271506
38# sys-kernel/chromeos-kernel-3_8
39# with version:
40# chromeos-base/tty-0.0.1-r4
41# chromeos-base/libchrome-271506-r5
42# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
43RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
44RE_EBUILD_WITH_VERSION = (
45 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
46
47
48def ParseELFWithArgs(args):
49 """Wrapper to parseelf.ParseELF accepting a single arg.
50
51 This wrapper is required to use multiprocessing.Pool.map function.
52
53 Returns:
54 A 2-tuple with the passed relative path and the result of ParseELF(). On
55 error, when ParseELF() returns None, this function returns None.
56 """
57 elf = parseelf.ParseELF(*args)
58 if elf is None:
59 return
60 return args[1], elf
61
62
63class DepTracker(object):
64 """Tracks dependencies and file information in a root directory.
65
66 This class computes dependencies and other information related to the files
67 in the root image.
68 """
69
70 def __init__(self, root, jobs=1):
71 root_st = os.lstat(root)
72 if not stat.S_ISDIR(root_st.st_mode):
73 raise Exception('root (%s) must be a directory' % root)
74 self._root = root.rstrip('/') + '/'
75
76 # A wrapper to the multiprocess map function. We avoid launching a pool
77 # of processes when jobs is 1 so python exceptions kill the main process,
78 # useful for debugging.
79 if jobs > 1:
80 self._pool = multiprocessing.Pool(jobs)
81 self._imap = self._pool.map
82 else:
83 self._imap = itertools.imap
84
85 self._files = {}
86 self._ebuilds = {}
87
88 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
89 # to point to the lowest lexicographically file with the same inode.
90 self._symlinks = {}
91 self._hardlinks = {}
92
93
94 def Init(self):
95 """Generates the initial list of files."""
96 # First iteration over all the files in root searching for symlinks and
97 # non-regular files.
98 seen_inodes = {}
99 for basepath, _, filenames in sorted(os.walk(self._root)):
100 for filename in sorted(filenames):
101 full_path = os.path.join(basepath, filename)
102 rel_path = full_path[len(self._root):]
103 st = os.lstat(full_path)
104
105 file_data = {
106 'size': st.st_size,
107 }
108 self._files[rel_path] = file_data
109
110 # Track symlinks.
111 if stat.S_ISLNK(st.st_mode):
112 link_path = os.readlink(full_path)
113 # lddtree's normpath handles a little more cases than the os.path
114 # version. In particular, it handles the '//' case.
115 self._symlinks[rel_path] = (
116 link_path.lstrip('/') if link_path and link_path[0] == '/' else
117 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
118 link_path)))
119 file_data['deps'] = {
120 'symlink': [self._symlinks[rel_path]]
121 }
122
123 # Track hardlinks.
124 if st.st_ino in seen_inodes:
125 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
126 continue
127 seen_inodes[st.st_ino] = rel_path
128
129 def SaveJSON(self, filename):
130 """Save the computed information to a JSON file.
131
132 Args:
133 filename: The destination JSON file.
134 """
135 data = {
136 'files': self._files,
137 'ebuilds': self._ebuilds,
138 }
139 json.dump(data, open(filename, 'w'))
140
141
142 def ComputeEbuildDeps(self, portage_db):
143 """Compute the dependencies between ebuilds and files.
144
145 Iterates over the list of ebuilds in the database and annotates the files
146 with the ebuilds they are in. For each ebuild installing a file in the root,
147 also compute the direct dependencies. Stores the information internally.
148
149 Args:
150 portage_db: The path to the portage db. Usually "/var/db/pkg".
151 """
152 portage_db = portage_db.rstrip('/') + '/'
153 for basepath, _, filenames in sorted(os.walk(portage_db)):
154 if 'CONTENTS' in filenames:
155 full_path = os.path.join(basepath, 'CONTENTS')
156 pkg = basepath[len(portage_db):]
157 pkg_files = []
158 pkg_size = 0
159 for line in open(full_path):
160 line = line.split()
161 # Line format is: "type file_path [more space-separated fields]".
162 # Discard any other line without at least the first two fields. The
163 # remaining fields depend on the type.
164 if len(line) < 2:
165 continue
166 typ, file_path = line[:2]
167 # We ignore other entries like for example "dir".
168 if not typ in ('obj', 'sym'):
169 continue
170 file_path = file_path.lstrip('/')
171 # We ignore files installed in the SYSROOT that weren't copied to the
172 # image.
173 if not file_path in self._files:
174 continue
175 pkg_files.append(file_path)
176 file_data = self._files[file_path]
177 if 'ebuild' in file_data:
178 cros_build_lib.Warning('Duplicated entry for %s: %s and %',
179 file_path, file_data['ebuild'], pkg)
180 file_data['ebuild'] = pkg
181 pkg_size += file_data['size']
182 if pkg_files:
183 pkg_atom = pkg
184 pkg_version = None
185 m = re.match(RE_EBUILD_WITHOUT_VERSION, pkg)
186 if m:
187 pkg_atom = m.group(1)
188 m = re.match(RE_EBUILD_WITH_VERSION, pkg)
189 if m:
190 pkg_atom = m.group(1)
191 pkg_version = m.group(2)
192 self._ebuilds[pkg] = {
193 'size': pkg_size,
194 'files': len(pkg_files),
195 'atom': pkg_atom,
196 'version': pkg_version,
197 }
198 # TODO(deymo): Parse dependencies between ebuilds.
199
200 def ComputeELFFileDeps(self):
201 """Computes the dependencies between files.
202
203 Computes the dependencies between the files in the root directory passed
204 during construction. The dependencies are inferred for ELF files.
205 The list of dependencies for each file in the passed rootfs as a dict().
206 The result's keys are the relative path of the files and the value of each
207 file is a list of dependencies. A dependency is a tuple (dep_path,
208 dep_type) where the dep_path is relative path from the passed root to the
209 dependent file and dep_type is one the following strings stating how the
210 dependency was discovered:
211 'ldd': The dependent ELF file is listed as needed in the dynamic section.
212 'symlink': The dependent file is a symlink to the depending.
213 If there are dependencies of a given type whose target file wasn't
214 determined, a tuple (None, dep_type) is included. This is the case for
215 example is a program uses library that wasn't found.
216 """
217 ldpaths = lddtree.LoadLdpaths(self._root)
218
219 # First iteration over all the files in root searching for symlinks and
220 # non-regular files.
221 parseelf_args = []
222 for rel_path, file_data in self._files.iteritems():
223 if rel_path in self._symlinks or rel_path in self._hardlinks:
224 continue
225
226 full_path = os.path.join(self._root, rel_path)
227 st = os.lstat(full_path)
228 if not stat.S_ISREG(st.st_mode):
229 continue
230 parseelf_args.append((self._root, rel_path, ldpaths))
231
232 # Parallelize the ELF lookup step since it is quite expensive.
233 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
234 if not x is None)
235
236 for rel_path, elf in elfs.iteritems():
237 file_data = self._files[rel_path]
238 file_deps = file_data.get('deps', {})
239 # Dependencies based on the result of ldd.
240 for lib in elf.get('needed', []):
241 lib_path = elf['libs'][lib]['path']
242 if not 'ldd' in file_deps:
243 file_deps['ldd'] = []
244 file_deps['ldd'].append(lib_path)
245
246 if file_deps:
247 file_data['deps'] = file_deps
248
249
250def ParseArgs(argv):
251 """Return parsed commandline arguments."""
252
253 parser = commandline.ArgumentParser()
254 parser.add_argument(
255 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
256 help='number of simultaneous jobs.')
257 parser.add_argument(
258 '--portage-db', type='path', metavar='PORTAGE_DB',
259 help='parse portage DB for ebuild information')
260 parser.add_argument(
261 '--json', type='path',
262 help='store information in JSON file')
263
264 parser.add_argument(
265 'root', type='path',
266 help='path to the directory where the rootfs is mounted.')
267
268 opts = parser.parse_args(argv)
269 opts.Freeze()
270 return opts
271
272
273def main(argv):
274 """Main function to start the script."""
275 opts = ParseArgs(argv)
276 cros_build_lib.Debug('Options are %s', opts)
277
278 dt = DepTracker(opts.root, jobs=opts.jobs)
279 dt.Init()
280
281 dt.ComputeELFFileDeps()
282
283 if opts.portage_db:
284 dt.ComputeEbuildDeps(opts.portage_db)
285
286 if opts.json:
287 dt.SaveJSON(opts.json)