Formatting: Format all python code with black.
This CL is probably not what you're looking for, it's only
automated formatting. Ignore it with
`git blame --ignore-rev <revision>` for this commit.
BUG=b:233893248
TEST=CQ
Change-Id: I66591d7a738d241aed3290138c0f68065ab10a6d
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/3879174
Reviewed-by: Mike Frysinger <vapier@chromium.org>
Tested-by: Alex Klein <saklein@chromium.org>
diff --git a/scripts/dep_tracker.py b/scripts/dep_tracker.py
index ced8cb8..ba7a123 100644
--- a/scripts/dep_tracker.py
+++ b/scripts/dep_tracker.py
@@ -42,262 +42,279 @@
# chromeos-base/tty-0.0.1-r4
# chromeos-base/libchrome-271506-r5
# sys-kernel/chromeos-kernel-3_8-3.8.11-r35
-RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
+RE_EBUILD_WITHOUT_VERSION = r"^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$"
RE_EBUILD_WITH_VERSION = (
- r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
+ r"^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$"
+)
def ParseELFWithArgs(args):
- """Wrapper to parseelf.ParseELF accepting a single arg.
+ """Wrapper to parseelf.ParseELF accepting a single arg.
- This wrapper is required to use multiprocessing.Pool.map function.
+ This wrapper is required to use multiprocessing.Pool.map function.
- Returns:
- A 2-tuple with the passed relative path and the result of ParseELF(). On
- error, when ParseELF() returns None, this function returns None.
- """
- elf = parseelf.ParseELF(*args)
- if elf is None:
- return
- return args[1], elf
+ Returns:
+ A 2-tuple with the passed relative path and the result of ParseELF(). On
+ error, when ParseELF() returns None, this function returns None.
+ """
+ elf = parseelf.ParseELF(*args)
+ if elf is None:
+ return
+ return args[1], elf
class DepTracker(object):
- """Tracks dependencies and file information in a root directory.
+ """Tracks dependencies and file information in a root directory.
- This class computes dependencies and other information related to the files
- in the root image.
- """
+ This class computes dependencies and other information related to the files
+ in the root image.
+ """
- def __init__(
- self,
- root: Union[str, os.PathLike],
- jobs: int = 1):
- # TODO(vapier): Convert this to Path.
- root = str(root)
- root_st = os.lstat(root)
- if not stat.S_ISDIR(root_st.st_mode):
- raise Exception('root (%s) must be a directory' % root)
- self._root = root.rstrip('/') + '/'
- self._file_type_decoder = filetype.FileTypeDecoder(root)
+ def __init__(self, root: Union[str, os.PathLike], jobs: int = 1):
+ # TODO(vapier): Convert this to Path.
+ root = str(root)
+ root_st = os.lstat(root)
+ if not stat.S_ISDIR(root_st.st_mode):
+ raise Exception("root (%s) must be a directory" % root)
+ self._root = root.rstrip("/") + "/"
+ self._file_type_decoder = filetype.FileTypeDecoder(root)
- # A wrapper to the multiprocess map function. We avoid launching a pool
- # of processes when jobs is 1 so python exceptions kill the main process,
- # useful for debugging.
- if jobs > 1:
- # Pool is close()d in DepTracker's destructor.
- # pylint: disable=consider-using-with
- self._pool = multiprocessing.Pool(jobs)
- self._imap = self._pool.map
- else:
- self._imap = map
+ # A wrapper to the multiprocess map function. We avoid launching a pool
+ # of processes when jobs is 1 so python exceptions kill the main process,
+ # useful for debugging.
+ if jobs > 1:
+ # Pool is close()d in DepTracker's destructor.
+ # pylint: disable=consider-using-with
+ self._pool = multiprocessing.Pool(jobs)
+ self._imap = self._pool.map
+ else:
+ self._imap = map
- self._files = {}
- self._ebuilds = {}
+ self._files = {}
+ self._ebuilds = {}
- # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
- # to point to the lowest lexicographically file with the same inode.
- self._symlinks = {}
- self._hardlinks = {}
+ # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
+ # to point to the lowest lexicographically file with the same inode.
+ self._symlinks = {}
+ self._hardlinks = {}
- def __del__(self):
- """Destructor method to free up self._pool resource."""
- self._pool.close()
+ def __del__(self):
+ """Destructor method to free up self._pool resource."""
+ self._pool.close()
- def Init(self):
- """Generates the initial list of files."""
- # First iteration over all the files in root searching for symlinks and
- # non-regular files.
- seen_inodes = {}
- for basepath, _, filenames in sorted(os.walk(self._root)):
- for filename in sorted(filenames):
- full_path = os.path.join(basepath, filename)
- rel_path = full_path[len(self._root):]
- st = os.lstat(full_path)
+ def Init(self):
+ """Generates the initial list of files."""
+ # First iteration over all the files in root searching for symlinks and
+ # non-regular files.
+ seen_inodes = {}
+ for basepath, _, filenames in sorted(os.walk(self._root)):
+ for filename in sorted(filenames):
+ full_path = os.path.join(basepath, filename)
+ rel_path = full_path[len(self._root) :]
+ st = os.lstat(full_path)
- file_data = {
- 'size': st.st_size,
+ file_data = {
+ "size": st.st_size,
+ }
+ self._files[rel_path] = file_data
+
+ # Track symlinks.
+ if stat.S_ISLNK(st.st_mode):
+ link_path = os.readlink(full_path)
+ # lddtree's normpath handles a little more cases than the os.path
+ # version. In particular, it handles the '//' case.
+ self._symlinks[rel_path] = (
+ link_path.lstrip("/")
+ if link_path and link_path[0] == "/"
+ else lddtree.normpath(
+ os.path.join(os.path.dirname(rel_path), link_path)
+ )
+ )
+ file_data["deps"] = {"symlink": [self._symlinks[rel_path]]}
+
+ # Track hardlinks.
+ if st.st_ino in seen_inodes:
+ self._hardlinks[rel_path] = seen_inodes[st.st_ino]
+ continue
+ seen_inodes[st.st_ino] = rel_path
+
+ def SaveJSON(self, filename):
+ """Save the computed information to a JSON file.
+
+ Args:
+ filename: The destination JSON file.
+ """
+ data = {
+ "files": self._files,
+ "ebuilds": self._ebuilds,
}
- self._files[rel_path] = file_data
+ json.dump(data, open(filename, "w"))
- # Track symlinks.
- if stat.S_ISLNK(st.st_mode):
- link_path = os.readlink(full_path)
- # lddtree's normpath handles a little more cases than the os.path
- # version. In particular, it handles the '//' case.
- self._symlinks[rel_path] = (
- link_path.lstrip('/') if link_path and link_path[0] == '/' else
- lddtree.normpath(os.path.join(os.path.dirname(rel_path),
- link_path)))
- file_data['deps'] = {
- 'symlink': [self._symlinks[rel_path]]
- }
+ def ComputeEbuildDeps(self, sysroot):
+ """Compute the dependencies between ebuilds and files.
- # Track hardlinks.
- if st.st_ino in seen_inodes:
- self._hardlinks[rel_path] = seen_inodes[st.st_ino]
- continue
- seen_inodes[st.st_ino] = rel_path
+ Iterates over the list of ebuilds in the database and annotates the files
+ with the ebuilds they are in. For each ebuild installing a file in the root,
+ also compute the direct dependencies. Stores the information internally.
- def SaveJSON(self, filename):
- """Save the computed information to a JSON file.
+ Args:
+ sysroot: The path to the sysroot, for example "/build/link".
+ """
+ portage_db = portage_util.PortageDB(sysroot)
+ if not os.path.exists(portage_db.db_path):
+ logging.warning(
+ "PortageDB directory not found: %s", portage_db.db_path
+ )
+ return
- Args:
- filename: The destination JSON file.
- """
- data = {
- 'files': self._files,
- 'ebuilds': self._ebuilds,
- }
- json.dump(data, open(filename, 'w'))
+ for pkg in portage_db.InstalledPackages():
+ pkg_files = []
+ pkg_size = 0
+ cpf = "%s/%s" % (pkg.category, pkg.pf)
+ for typ, rel_path in pkg.ListContents():
+ # We ignore other entries like for example "dir".
+ if not typ in (pkg.OBJ, pkg.SYM):
+ continue
+ # We ignore files installed in the SYSROOT that weren't copied to the
+ # image.
+ if not rel_path in self._files:
+ continue
+ pkg_files.append(rel_path)
+ file_data = self._files[rel_path]
+ if "ebuild" in file_data:
+ logging.warning(
+ "Duplicated entry for %s: %s and %s",
+ rel_path,
+ file_data["ebuild"],
+ cpf,
+ )
+ file_data["ebuild"] = cpf
+ pkg_size += file_data["size"]
+ # Ignore packages that don't install any file.
+ if not pkg_files:
+ continue
+ self._ebuilds[cpf] = {
+ "size": pkg_size,
+ "files": len(pkg_files),
+ "atom": "%s/%s" % (pkg.category, pkg.package),
+ "version": pkg.version,
+ }
+ # TODO(deymo): Parse dependencies between ebuilds.
- def ComputeEbuildDeps(self, sysroot):
- """Compute the dependencies between ebuilds and files.
+ def ComputeELFFileDeps(self):
+ """Computes the dependencies between files.
- Iterates over the list of ebuilds in the database and annotates the files
- with the ebuilds they are in. For each ebuild installing a file in the root,
- also compute the direct dependencies. Stores the information internally.
+ Computes the dependencies between the files in the root directory passed
+ during construction. The dependencies are inferred for ELF files.
+ The list of dependencies for each file in the passed rootfs as a dict().
+ The result's keys are the relative path of the files and the value of each
+ file is a list of dependencies. A dependency is a tuple (dep_path,
+ dep_type) where the dep_path is relative path from the passed root to the
+ dependent file and dep_type is one the following strings stating how the
+ dependency was discovered:
+ 'ldd': The dependent ELF file is listed as needed in the dynamic section.
+ 'symlink': The dependent file is a symlink to the depending.
+ If there are dependencies of a given type whose target file wasn't
+ determined, a tuple (None, dep_type) is included. This is the case for
+ example is a program uses library that wasn't found.
+ """
+ ldpaths = lddtree.LoadLdpaths(self._root)
- Args:
- sysroot: The path to the sysroot, for example "/build/link".
- """
- portage_db = portage_util.PortageDB(sysroot)
- if not os.path.exists(portage_db.db_path):
- logging.warning('PortageDB directory not found: %s', portage_db.db_path)
- return
+ # First iteration over all the files in root searching for symlinks and
+ # non-regular files.
+ parseelf_args = []
+ for rel_path, file_data in self._files.items():
+ if rel_path in self._symlinks or rel_path in self._hardlinks:
+ continue
- for pkg in portage_db.InstalledPackages():
- pkg_files = []
- pkg_size = 0
- cpf = '%s/%s' % (pkg.category, pkg.pf)
- for typ, rel_path in pkg.ListContents():
- # We ignore other entries like for example "dir".
- if not typ in (pkg.OBJ, pkg.SYM):
- continue
- # We ignore files installed in the SYSROOT that weren't copied to the
- # image.
- if not rel_path in self._files:
- continue
- pkg_files.append(rel_path)
- file_data = self._files[rel_path]
- if 'ebuild' in file_data:
- logging.warning('Duplicated entry for %s: %s and %s',
- rel_path, file_data['ebuild'], cpf)
- file_data['ebuild'] = cpf
- pkg_size += file_data['size']
- # Ignore packages that don't install any file.
- if not pkg_files:
- continue
- self._ebuilds[cpf] = {
- 'size': pkg_size,
- 'files': len(pkg_files),
- 'atom': '%s/%s' % (pkg.category, pkg.package),
- 'version': pkg.version,
- }
- # TODO(deymo): Parse dependencies between ebuilds.
+ full_path = os.path.join(self._root, rel_path)
+ st = os.lstat(full_path)
+ if not stat.S_ISREG(st.st_mode):
+ continue
+ parseelf_args.append((self._root, rel_path, ldpaths))
- def ComputeELFFileDeps(self):
- """Computes the dependencies between files.
+ # Parallelize the ELF lookup step since it is quite expensive.
+ elfs = dict(
+ x
+ for x in self._imap(ParseELFWithArgs, parseelf_args)
+ if not x is None
+ )
- Computes the dependencies between the files in the root directory passed
- during construction. The dependencies are inferred for ELF files.
- The list of dependencies for each file in the passed rootfs as a dict().
- The result's keys are the relative path of the files and the value of each
- file is a list of dependencies. A dependency is a tuple (dep_path,
- dep_type) where the dep_path is relative path from the passed root to the
- dependent file and dep_type is one the following strings stating how the
- dependency was discovered:
- 'ldd': The dependent ELF file is listed as needed in the dynamic section.
- 'symlink': The dependent file is a symlink to the depending.
- If there are dependencies of a given type whose target file wasn't
- determined, a tuple (None, dep_type) is included. This is the case for
- example is a program uses library that wasn't found.
- """
- ldpaths = lddtree.LoadLdpaths(self._root)
+ for rel_path, elf in elfs.items():
+ file_data = self._files[rel_path]
+ # Fill in the ftype if not set yet. We complete this value at this point
+ # to avoid re-parsing the ELF file later.
+ if not "ftype" in file_data:
+ ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
+ if ftype:
+ file_data["ftype"] = ftype
- # First iteration over all the files in root searching for symlinks and
- # non-regular files.
- parseelf_args = []
- for rel_path, file_data in self._files.items():
- if rel_path in self._symlinks or rel_path in self._hardlinks:
- continue
+ file_deps = file_data.get("deps", {})
+ # Dependencies based on the result of ldd.
+ for lib in elf.get("needed", []):
+ lib_path = elf["libs"][lib]["path"]
+ if not "ldd" in file_deps:
+ file_deps["ldd"] = []
+ file_deps["ldd"].append(lib_path)
- full_path = os.path.join(self._root, rel_path)
- st = os.lstat(full_path)
- if not stat.S_ISREG(st.st_mode):
- continue
- parseelf_args.append((self._root, rel_path, ldpaths))
+ if file_deps:
+ file_data["deps"] = file_deps
- # Parallelize the ELF lookup step since it is quite expensive.
- elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
- if not x is None)
-
- for rel_path, elf in elfs.items():
- file_data = self._files[rel_path]
- # Fill in the ftype if not set yet. We complete this value at this point
- # to avoid re-parsing the ELF file later.
- if not 'ftype' in file_data:
- ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
- if ftype:
- file_data['ftype'] = ftype
-
- file_deps = file_data.get('deps', {})
- # Dependencies based on the result of ldd.
- for lib in elf.get('needed', []):
- lib_path = elf['libs'][lib]['path']
- if not 'ldd' in file_deps:
- file_deps['ldd'] = []
- file_deps['ldd'].append(lib_path)
-
- if file_deps:
- file_data['deps'] = file_deps
-
- def ComputeFileTypes(self):
- """Computes all the missing file type for the files in the root."""
- for rel_path, file_data in self._files.items():
- if 'ftype' in file_data:
- continue
- ftype = self._file_type_decoder.GetType(rel_path)
- if ftype:
- file_data['ftype'] = ftype
+ def ComputeFileTypes(self):
+ """Computes all the missing file type for the files in the root."""
+ for rel_path, file_data in self._files.items():
+ if "ftype" in file_data:
+ continue
+ ftype = self._file_type_decoder.GetType(rel_path)
+ if ftype:
+ file_data["ftype"] = ftype
def ParseArgs(argv):
- """Return parsed commandline arguments."""
+ """Return parsed commandline arguments."""
- parser = commandline.ArgumentParser()
- parser.add_argument(
- '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
- help='number of simultaneous jobs.')
- parser.add_argument(
- '--sysroot', type='path', metavar='SYSROOT',
- help='parse portage DB for ebuild information from the provided sysroot.')
- parser.add_argument(
- '--json', type='path',
- help='store information in JSON file.')
+ parser = commandline.ArgumentParser()
+ parser.add_argument(
+ "-j",
+ "--jobs",
+ type=int,
+ default=multiprocessing.cpu_count(),
+ help="number of simultaneous jobs.",
+ )
+ parser.add_argument(
+ "--sysroot",
+ type="path",
+ metavar="SYSROOT",
+ help="parse portage DB for ebuild information from the provided sysroot.",
+ )
+ parser.add_argument(
+ "--json", type="path", help="store information in JSON file."
+ )
- parser.add_argument(
- 'root', type='path',
- help='path to the directory where the rootfs is mounted.')
+ parser.add_argument(
+ "root",
+ type="path",
+ help="path to the directory where the rootfs is mounted.",
+ )
- opts = parser.parse_args(argv)
- opts.Freeze()
- return opts
+ opts = parser.parse_args(argv)
+ opts.Freeze()
+ return opts
def main(argv):
- """Main function to start the script."""
- opts = ParseArgs(argv)
- logging.debug('Options are %s', opts)
+ """Main function to start the script."""
+ opts = ParseArgs(argv)
+ logging.debug("Options are %s", opts)
- dt = DepTracker(opts.root, jobs=opts.jobs)
- dt.Init()
+ dt = DepTracker(opts.root, jobs=opts.jobs)
+ dt.Init()
- dt.ComputeELFFileDeps()
- dt.ComputeFileTypes()
+ dt.ComputeELFFileDeps()
+ dt.ComputeFileTypes()
- if opts.sysroot:
- dt.ComputeEbuildDeps(opts.sysroot)
+ if opts.sysroot:
+ dt.ComputeEbuildDeps(opts.sysroot)
- if opts.json:
- dt.SaveJSON(opts.json)
+ if opts.json:
+ dt.SaveJSON(opts.json)