blob: a05e9270ff5efe4a2bea18ffa491508b8cbcc80e [file] [log] [blame]
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07001# Copyright 2014 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Mike Frysinger750c5f52014-09-16 16:16:57 -04005"""Script to discover dependencies and other file information from a build.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -07006
7Some files in the image are installed to provide some functionality, such as
8chrome, shill or bluetoothd provide different functionality that can be
9present or not on a given build. Many other files are dependencies from these
10files that need to be present in the image for them to work. These dependencies
11come from needed shared libraries, executed files and other configuration files
12read.
13
14This script currently discovers dependencies between ELF files for libraries
15required at load time (libraries loaded by the dynamic linker) but not
Alex Deymo365b10c2014-08-25 13:14:28 -070016libraries loaded at runtime with dlopen(). It also computes size and file type
17in several cases to help understand the contents of the built image.
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070018"""
19
20import itertools
21import json
22import multiprocessing
23import os
24import re
25import stat
26
27from chromite.lib import commandline
28from chromite.lib import cros_build_lib
Alex Deymo365b10c2014-08-25 13:14:28 -070029from chromite.lib import filetype
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070030from chromite.lib import parseelf
31from chromite.scripts import lddtree
32
33
34# Regex to parse Gentoo atoms. This should match the following ebuild names,
35# splitting the package name from the version.
36# without version:
37# chromeos-base/tty
38# chromeos-base/libchrome-271506
39# sys-kernel/chromeos-kernel-3_8
40# with version:
41# chromeos-base/tty-0.0.1-r4
42# chromeos-base/libchrome-271506-r5
43# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
44RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
45RE_EBUILD_WITH_VERSION = (
46 r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
47
48
49def ParseELFWithArgs(args):
50 """Wrapper to parseelf.ParseELF accepting a single arg.
51
52 This wrapper is required to use multiprocessing.Pool.map function.
53
54 Returns:
55 A 2-tuple with the passed relative path and the result of ParseELF(). On
56 error, when ParseELF() returns None, this function returns None.
57 """
58 elf = parseelf.ParseELF(*args)
59 if elf is None:
60 return
61 return args[1], elf
62
63
64class DepTracker(object):
65 """Tracks dependencies and file information in a root directory.
66
67 This class computes dependencies and other information related to the files
68 in the root image.
69 """
70
71 def __init__(self, root, jobs=1):
72 root_st = os.lstat(root)
73 if not stat.S_ISDIR(root_st.st_mode):
74 raise Exception('root (%s) must be a directory' % root)
75 self._root = root.rstrip('/') + '/'
Alex Deymo365b10c2014-08-25 13:14:28 -070076 self._file_type_decoder = filetype.FileTypeDecoder(root)
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070077
78 # A wrapper to the multiprocess map function. We avoid launching a pool
79 # of processes when jobs is 1 so python exceptions kill the main process,
80 # useful for debugging.
81 if jobs > 1:
82 self._pool = multiprocessing.Pool(jobs)
83 self._imap = self._pool.map
84 else:
85 self._imap = itertools.imap
86
87 self._files = {}
88 self._ebuilds = {}
89
90 # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
91 # to point to the lowest lexicographically file with the same inode.
92 self._symlinks = {}
93 self._hardlinks = {}
94
Alex Deymo3cfb9cd2014-08-18 15:56:35 -070095 def Init(self):
96 """Generates the initial list of files."""
97 # First iteration over all the files in root searching for symlinks and
98 # non-regular files.
99 seen_inodes = {}
100 for basepath, _, filenames in sorted(os.walk(self._root)):
101 for filename in sorted(filenames):
102 full_path = os.path.join(basepath, filename)
103 rel_path = full_path[len(self._root):]
104 st = os.lstat(full_path)
105
106 file_data = {
107 'size': st.st_size,
108 }
109 self._files[rel_path] = file_data
110
111 # Track symlinks.
112 if stat.S_ISLNK(st.st_mode):
113 link_path = os.readlink(full_path)
114 # lddtree's normpath handles a little more cases than the os.path
115 # version. In particular, it handles the '//' case.
116 self._symlinks[rel_path] = (
117 link_path.lstrip('/') if link_path and link_path[0] == '/' else
118 lddtree.normpath(os.path.join(os.path.dirname(rel_path),
119 link_path)))
120 file_data['deps'] = {
121 'symlink': [self._symlinks[rel_path]]
122 }
123
124 # Track hardlinks.
125 if st.st_ino in seen_inodes:
126 self._hardlinks[rel_path] = seen_inodes[st.st_ino]
127 continue
128 seen_inodes[st.st_ino] = rel_path
129
130 def SaveJSON(self, filename):
131 """Save the computed information to a JSON file.
132
133 Args:
134 filename: The destination JSON file.
135 """
136 data = {
137 'files': self._files,
138 'ebuilds': self._ebuilds,
139 }
140 json.dump(data, open(filename, 'w'))
141
142
143 def ComputeEbuildDeps(self, portage_db):
144 """Compute the dependencies between ebuilds and files.
145
146 Iterates over the list of ebuilds in the database and annotates the files
147 with the ebuilds they are in. For each ebuild installing a file in the root,
148 also compute the direct dependencies. Stores the information internally.
149
150 Args:
151 portage_db: The path to the portage db. Usually "/var/db/pkg".
152 """
153 portage_db = portage_db.rstrip('/') + '/'
154 for basepath, _, filenames in sorted(os.walk(portage_db)):
155 if 'CONTENTS' in filenames:
156 full_path = os.path.join(basepath, 'CONTENTS')
157 pkg = basepath[len(portage_db):]
158 pkg_files = []
159 pkg_size = 0
160 for line in open(full_path):
161 line = line.split()
162 # Line format is: "type file_path [more space-separated fields]".
163 # Discard any other line without at least the first two fields. The
164 # remaining fields depend on the type.
165 if len(line) < 2:
166 continue
167 typ, file_path = line[:2]
168 # We ignore other entries like for example "dir".
169 if not typ in ('obj', 'sym'):
170 continue
171 file_path = file_path.lstrip('/')
172 # We ignore files installed in the SYSROOT that weren't copied to the
173 # image.
174 if not file_path in self._files:
175 continue
176 pkg_files.append(file_path)
177 file_data = self._files[file_path]
178 if 'ebuild' in file_data:
179 cros_build_lib.Warning('Duplicated entry for %s: %s and %',
180 file_path, file_data['ebuild'], pkg)
181 file_data['ebuild'] = pkg
182 pkg_size += file_data['size']
183 if pkg_files:
184 pkg_atom = pkg
185 pkg_version = None
186 m = re.match(RE_EBUILD_WITHOUT_VERSION, pkg)
187 if m:
188 pkg_atom = m.group(1)
189 m = re.match(RE_EBUILD_WITH_VERSION, pkg)
190 if m:
191 pkg_atom = m.group(1)
192 pkg_version = m.group(2)
193 self._ebuilds[pkg] = {
194 'size': pkg_size,
195 'files': len(pkg_files),
196 'atom': pkg_atom,
197 'version': pkg_version,
198 }
199 # TODO(deymo): Parse dependencies between ebuilds.
200
201 def ComputeELFFileDeps(self):
202 """Computes the dependencies between files.
203
204 Computes the dependencies between the files in the root directory passed
205 during construction. The dependencies are inferred for ELF files.
206 The list of dependencies for each file in the passed rootfs as a dict().
207 The result's keys are the relative path of the files and the value of each
208 file is a list of dependencies. A dependency is a tuple (dep_path,
209 dep_type) where the dep_path is relative path from the passed root to the
210 dependent file and dep_type is one the following strings stating how the
211 dependency was discovered:
212 'ldd': The dependent ELF file is listed as needed in the dynamic section.
213 'symlink': The dependent file is a symlink to the depending.
214 If there are dependencies of a given type whose target file wasn't
215 determined, a tuple (None, dep_type) is included. This is the case for
216 example is a program uses library that wasn't found.
217 """
218 ldpaths = lddtree.LoadLdpaths(self._root)
219
220 # First iteration over all the files in root searching for symlinks and
221 # non-regular files.
222 parseelf_args = []
223 for rel_path, file_data in self._files.iteritems():
224 if rel_path in self._symlinks or rel_path in self._hardlinks:
225 continue
226
227 full_path = os.path.join(self._root, rel_path)
228 st = os.lstat(full_path)
229 if not stat.S_ISREG(st.st_mode):
230 continue
231 parseelf_args.append((self._root, rel_path, ldpaths))
232
233 # Parallelize the ELF lookup step since it is quite expensive.
234 elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
235 if not x is None)
236
237 for rel_path, elf in elfs.iteritems():
238 file_data = self._files[rel_path]
Alex Deymo365b10c2014-08-25 13:14:28 -0700239 # Fill in the ftype if not set yet. We complete this value at this point
240 # to avoid re-parsing the ELF file later.
241 if not 'ftype' in file_data:
242 ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
243 if ftype:
244 file_data['ftype'] = ftype
245
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700246 file_deps = file_data.get('deps', {})
247 # Dependencies based on the result of ldd.
248 for lib in elf.get('needed', []):
249 lib_path = elf['libs'][lib]['path']
250 if not 'ldd' in file_deps:
251 file_deps['ldd'] = []
252 file_deps['ldd'].append(lib_path)
253
254 if file_deps:
255 file_data['deps'] = file_deps
256
Alex Deymo365b10c2014-08-25 13:14:28 -0700257 def ComputeFileTypes(self):
258 """Computes all the missing file type for the files in the root."""
259 for rel_path, file_data in self._files.iteritems():
260 if 'ftype' in file_data:
261 continue
262 ftype = self._file_type_decoder.GetType(rel_path)
263 if ftype:
264 file_data['ftype'] = ftype
265
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700266
267def ParseArgs(argv):
268 """Return parsed commandline arguments."""
269
270 parser = commandline.ArgumentParser()
271 parser.add_argument(
272 '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
273 help='number of simultaneous jobs.')
274 parser.add_argument(
275 '--portage-db', type='path', metavar='PORTAGE_DB',
276 help='parse portage DB for ebuild information')
277 parser.add_argument(
278 '--json', type='path',
279 help='store information in JSON file')
280
281 parser.add_argument(
282 'root', type='path',
283 help='path to the directory where the rootfs is mounted.')
284
285 opts = parser.parse_args(argv)
286 opts.Freeze()
287 return opts
288
289
290def main(argv):
291 """Main function to start the script."""
292 opts = ParseArgs(argv)
293 cros_build_lib.Debug('Options are %s', opts)
294
295 dt = DepTracker(opts.root, jobs=opts.jobs)
296 dt.Init()
297
298 dt.ComputeELFFileDeps()
Alex Deymo365b10c2014-08-25 13:14:28 -0700299 dt.ComputeFileTypes()
Alex Deymo3cfb9cd2014-08-18 15:56:35 -0700300
301 if opts.portage_db:
302 dt.ComputeEbuildDeps(opts.portage_db)
303
304 if opts.json:
305 dt.SaveJSON(opts.json)