blob: ecdb10d8d81548b19cfff5e383822fc83b4aba40 [file] [log] [blame]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07001# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""\
6Script to discover dependencies and other file information from a build.
7
8Some files in the image are installed to provide some functionality, such as
9chrome, shill or bluetoothd provide different functionality that can be
10present or not on a given build. Many other files are dependencies from these
11files that need to be present in the image for them to work. These dependencies
12come from needed shared libraries, executed files and other configuration files
13read.
14
15This script currently discovers dependencies between ELF files for libraries
16required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070017libraries loaded at runtime with dlopen(). It also computes size and file type
18in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070019"""
20
21import itertools
22import json
23import multiprocessing
24import os
25import re
26import stat
27
28from chromite.lib import commandline
29from chromite.lib import cros_build_lib
Alex Deymo365b10c2014-08-25 13:14:28 -070030from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070031from chromite.lib import parseelf
32from chromite.scripts import lddtree
33
34
35# Regex to parse Gentoo atoms. This should match the following ebuild names,
36# splitting the package name from the version.
37# without version:
38# chromeos-base/tty
39# chromeos-base/libchrome-271506
40# sys-kernel/chromeos-kernel-3_8
41# with version:
42# chromeos-base/tty-0.0.1-r4
43# chromeos-base/libchrome-271506-r5
44# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
45RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
46RE_EBUILD_WITH_VERSION = (
47 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
48
49
50def ParseELFWithArgs(args):
51 """Wrapper to parseelf.ParseELF accepting a single arg.
52
53 This wrapper is required to use multiprocessing.Pool.map function.
54
55 Returns:
56 A 2-tuple with the passed relative path and the result of ParseELF(). On
57 error, when ParseELF() returns None, this function returns None.
58 """
59 elf = parseelf.ParseELF(*args)
60 if elf is None:
61 return
62 return args[1], elf
63
64
65class DepTracker(object):
66 """Tracks dependencies and file information in a root directory.
67
68 This class computes dependencies and other information related to the files
69 in the root image.
70 """
71
72 def __init__(self, root, jobs=1):
73 root_st = os.lstat(root)
74 if not stat.S_ISDIR(root_st.st_mode):
75 raise Exception('root (%s) must be a directory' % root)
76 self._root = root.rstrip('/') + '/'
Alex Deymo365b10c2014-08-25 13:14:28 -070077 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070078
79 # A wrapper to the multiprocess map function. We avoid launching a pool
80 # of processes when jobs is 1 so python exceptions kill the main process,
81 # useful for debugging.
82 if jobs > 1:
83 self._pool = multiprocessing.Pool(jobs)
84 self._imap = self._pool.map
85 else:
86 self._imap = itertools.imap
87
88 self._files = {}
89 self._ebuilds = {}
90
91 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
92 # to point to the lowest lexicographically file with the same inode.
93 self._symlinks = {}
94 self._hardlinks = {}
95
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070096 def Init(self):
97 """Generates the initial list of files."""
98 # First iteration over all the files in root searching for symlinks and
99 # non-regular files.
100 seen_inodes = {}
101 for basepath, _, filenames in sorted(os.walk(self._root)):
102 for filename in sorted(filenames):
103 full_path = os.path.join(basepath, filename)
104 rel_path = full_path[len(self._root):]
105 st = os.lstat(full_path)
106
107 file_data = {
108 'size': st.st_size,
109 }
110 self._files[rel_path] = file_data
111
112 # Track symlinks.
113 if stat.S_ISLNK(st.st_mode):
114 link_path = os.readlink(full_path)
115 # lddtree's normpath handles a little more cases than the os.path
116 # version. In particular, it handles the '//' case.
117 self._symlinks[rel_path] = (
118 link_path.lstrip('/') if link_path and link_path[0] == '/' else
119 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
120 link_path)))
121 file_data['deps'] = {
122 'symlink': [self._symlinks[rel_path]]
123 }
124
125 # Track hardlinks.
126 if st.st_ino in seen_inodes:
127 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
128 continue
129 seen_inodes[st.st_ino] = rel_path
130
131 def SaveJSON(self, filename):
132 """Save the computed information to a JSON file.
133
134 Args:
135 filename: The destination JSON file.
136 """
137 data = {
138 'files': self._files,
139 'ebuilds': self._ebuilds,
140 }
141 json.dump(data, open(filename, 'w'))
142
143
144 def ComputeEbuildDeps(self, portage_db):
145 """Compute the dependencies between ebuilds and files.
146
147 Iterates over the list of ebuilds in the database and annotates the files
148 with the ebuilds they are in. For each ebuild installing a file in the root,
149 also compute the direct dependencies. Stores the information internally.
150
151 Args:
152 portage_db: The path to the portage db. Usually "/var/db/pkg".
153 """
154 portage_db = portage_db.rstrip('/') + '/'
155 for basepath, _, filenames in sorted(os.walk(portage_db)):
156 if 'CONTENTS' in filenames:
157 full_path = os.path.join(basepath, 'CONTENTS')
158 pkg = basepath[len(portage_db):]
159 pkg_files = []
160 pkg_size = 0
161 for line in open(full_path):
162 line = line.split()
163 # Line format is: "type file_path [more space-separated fields]".
164 # Discard any other line without at least the first two fields. The
165 # remaining fields depend on the type.
166 if len(line) < 2:
167 continue
168 typ, file_path = line[:2]
169 # We ignore other entries like for example "dir".
170 if not typ in ('obj', 'sym'):
171 continue
172 file_path = file_path.lstrip('/')
173 # We ignore files installed in the SYSROOT that weren't copied to the
174 # image.
175 if not file_path in self._files:
176 continue
177 pkg_files.append(file_path)
178 file_data = self._files[file_path]
179 if 'ebuild' in file_data:
180 cros_build_lib.Warning('Duplicated entry for %s: %s and %',
181 file_path, file_data['ebuild'], pkg)
182 file_data['ebuild'] = pkg
183 pkg_size += file_data['size']
184 if pkg_files:
185 pkg_atom = pkg
186 pkg_version = None
187 m = re.match(RE_EBUILD_WITHOUT_VERSION, pkg)
188 if m:
189 pkg_atom = m.group(1)
190 m = re.match(RE_EBUILD_WITH_VERSION, pkg)
191 if m:
192 pkg_atom = m.group(1)
193 pkg_version = m.group(2)
194 self._ebuilds[pkg] = {
195 'size': pkg_size,
196 'files': len(pkg_files),
197 'atom': pkg_atom,
198 'version': pkg_version,
199 }
200 # TODO(deymo): Parse dependencies between ebuilds.
201
202 def ComputeELFFileDeps(self):
203 """Computes the dependencies between files.
204
205 Computes the dependencies between the files in the root directory passed
206 during construction. The dependencies are inferred for ELF files.
207 The list of dependencies for each file in the passed rootfs as a dict().
208 The result's keys are the relative path of the files and the value of each
209 file is a list of dependencies. A dependency is a tuple (dep_path,
210 dep_type) where the dep_path is relative path from the passed root to the
211 dependent file and dep_type is one the following strings stating how the
212 dependency was discovered:
213 'ldd': The dependent ELF file is listed as needed in the dynamic section.
214 'symlink': The dependent file is a symlink to the depending.
215 If there are dependencies of a given type whose target file wasn't
216 determined, a tuple (None, dep_type) is included. This is the case for
217 example is a program uses library that wasn't found.
218 """
219 ldpaths = lddtree.LoadLdpaths(self._root)
220
221 # First iteration over all the files in root searching for symlinks and
222 # non-regular files.
223 parseelf_args = []
224 for rel_path, file_data in self._files.iteritems():
225 if rel_path in self._symlinks or rel_path in self._hardlinks:
226 continue
227
228 full_path = os.path.join(self._root, rel_path)
229 st = os.lstat(full_path)
230 if not stat.S_ISREG(st.st_mode):
231 continue
232 parseelf_args.append((self._root, rel_path, ldpaths))
233
234 # Parallelize the ELF lookup step since it is quite expensive.
235 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
236 if not x is None)
237
238 for rel_path, elf in elfs.iteritems():
239 file_data = self._files[rel_path]
Alex Deymo365b10c2014-08-25 13:14:28 -0700240 # Fill in the ftype if not set yet. We complete this value at this point
241 # to avoid re-parsing the ELF file later.
242 if not 'ftype' in file_data:
243 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
244 if ftype:
245 file_data['ftype'] = ftype
246
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700247 file_deps = file_data.get('deps', {})
248 # Dependencies based on the result of ldd.
249 for lib in elf.get('needed', []):
250 lib_path = elf['libs'][lib]['path']
251 if not 'ldd' in file_deps:
252 file_deps['ldd'] = []
253 file_deps['ldd'].append(lib_path)
254
255 if file_deps:
256 file_data['deps'] = file_deps
257
Alex Deymo365b10c2014-08-25 13:14:28 -0700258 def ComputeFileTypes(self):
259 """Computes all the missing file type for the files in the root."""
260 for rel_path, file_data in self._files.iteritems():
261 if 'ftype' in file_data:
262 continue
263 ftype = self._file_type_decoder.GetType(rel_path)
264 if ftype:
265 file_data['ftype'] = ftype
266
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700267
268def ParseArgs(argv):
269 """Return parsed commandline arguments."""
270
271 parser = commandline.ArgumentParser()
272 parser.add_argument(
273 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
274 help='number of simultaneous jobs.')
275 parser.add_argument(
276 '--portage-db', type='path', metavar='PORTAGE_DB',
277 help='parse portage DB for ebuild information')
278 parser.add_argument(
279 '--json', type='path',
280 help='store information in JSON file')
281
282 parser.add_argument(
283 'root', type='path',
284 help='path to the directory where the rootfs is mounted.')
285
286 opts = parser.parse_args(argv)
287 opts.Freeze()
288 return opts
289
290
291def main(argv):
292 """Main function to start the script."""
293 opts = ParseArgs(argv)
294 cros_build_lib.Debug('Options are %s', opts)
295
296 dt = DepTracker(opts.root, jobs=opts.jobs)
297 dt.Init()
298
299 dt.ComputeELFFileDeps()
Alex Deymo365b10c2014-08-25 13:14:28 -0700300 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700301
302 if opts.portage_db:
303 dt.ComputeEbuildDeps(opts.portage_db)
304
305 if opts.json:
306 dt.SaveJSON(opts.json)