Formatting: Format all python code with black. This CL is probably not what you're looking for, it's only automated formatting. Ignore it with `git blame --ignore-rev <revision>` for this commit. BUG=b:233893248 TEST=CQ Change-Id: I66591d7a738d241aed3290138c0f68065ab10a6d Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/chromite/+/3879174 Reviewed-by: Mike Frysinger <vapier@chromium.org> Tested-by: Alex Klein <saklein@chromium.org>

commit: 1699fabc1efaaa1f1f02d199a0ec0cedfe2adc63 [log] [tgz]
author: Alex Klein <saklein@chromium.org> Thu Sep 08 08:46:06 2022 -0600
committer: Alex Klein <saklein@chromium.org> Fri Sep 09 18:06:48 2022 +0000
tree: d4c8e4c1e1378304bebde88a806a175148e9886d
parent: 6b8f9fb41a267523ddfb09fd96150d6a9e9b4471 [diff] [blame]
diff --git a/scripts/dep_tracker.py b/scripts/dep_tracker.py
index ced8cb8..ba7a123 100644
--- a/scripts/dep_tracker.py
+++ b/scripts/dep_tracker.py

@@ -42,262 +42,279 @@
 #   chromeos-base/tty-0.0.1-r4
 #   chromeos-base/libchrome-271506-r5
 #   sys-kernel/chromeos-kernel-3_8-3.8.11-r35
-RE_EBUILD_WITHOUT_VERSION = r'^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$'
+RE_EBUILD_WITHOUT_VERSION = r"^([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)$"
 RE_EBUILD_WITH_VERSION = (
-    r'^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$')
+    r"^=?([a-z0-9\-]+/[a-zA-Z0-9\_\+\-]+)\-([^\-]+(\-r\d+)?)$"
+)
 
 
 def ParseELFWithArgs(args):
-  """Wrapper to parseelf.ParseELF accepting a single arg.
+    """Wrapper to parseelf.ParseELF accepting a single arg.
 
-  This wrapper is required to use multiprocessing.Pool.map function.
+    This wrapper is required to use multiprocessing.Pool.map function.
 
-  Returns:
-    A 2-tuple with the passed relative path and the result of ParseELF(). On
-    error, when ParseELF() returns None, this function returns None.
-  """
-  elf = parseelf.ParseELF(*args)
-  if elf is None:
-    return
-  return args[1], elf
+    Returns:
+      A 2-tuple with the passed relative path and the result of ParseELF(). On
+      error, when ParseELF() returns None, this function returns None.
+    """
+    elf = parseelf.ParseELF(*args)
+    if elf is None:
+        return
+    return args[1], elf
 
 
 class DepTracker(object):
-  """Tracks dependencies and file information in a root directory.
+    """Tracks dependencies and file information in a root directory.
 
-  This class computes dependencies and other information related to the files
-  in the root image.
-  """
+    This class computes dependencies and other information related to the files
+    in the root image.
+    """
 
-  def __init__(
-      self,
-      root: Union[str, os.PathLike],
-      jobs: int = 1):
-    # TODO(vapier): Convert this to Path.
-    root = str(root)
-    root_st = os.lstat(root)
-    if not stat.S_ISDIR(root_st.st_mode):
-      raise Exception('root (%s) must be a directory' % root)
-    self._root = root.rstrip('/') + '/'
-    self._file_type_decoder = filetype.FileTypeDecoder(root)
+    def __init__(self, root: Union[str, os.PathLike], jobs: int = 1):
+        # TODO(vapier): Convert this to Path.
+        root = str(root)
+        root_st = os.lstat(root)
+        if not stat.S_ISDIR(root_st.st_mode):
+            raise Exception("root (%s) must be a directory" % root)
+        self._root = root.rstrip("/") + "/"
+        self._file_type_decoder = filetype.FileTypeDecoder(root)
 
-    # A wrapper to the multiprocess map function. We avoid launching a pool
-    # of processes when jobs is 1 so python exceptions kill the main process,
-    # useful for debugging.
-    if jobs > 1:
-      # Pool is close()d in DepTracker's destructor.
-      # pylint: disable=consider-using-with
-      self._pool = multiprocessing.Pool(jobs)
-      self._imap = self._pool.map
-    else:
-      self._imap = map
+        # A wrapper to the multiprocess map function. We avoid launching a pool
+        # of processes when jobs is 1 so python exceptions kill the main process,
+        # useful for debugging.
+        if jobs > 1:
+            # Pool is close()d in DepTracker's destructor.
+            # pylint: disable=consider-using-with
+            self._pool = multiprocessing.Pool(jobs)
+            self._imap = self._pool.map
+        else:
+            self._imap = map
 
-    self._files = {}
-    self._ebuilds = {}
+        self._files = {}
+        self._ebuilds = {}
 
-    # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
-    # to point to the lowest lexicographically file with the same inode.
-    self._symlinks = {}
-    self._hardlinks = {}
+        # Mapping of rel_paths for symlinks and hardlinks. Hardlinks are assumed
+        # to point to the lowest lexicographically file with the same inode.
+        self._symlinks = {}
+        self._hardlinks = {}
 
-  def __del__(self):
-    """Destructor method to free up self._pool resource."""
-    self._pool.close()
+    def __del__(self):
+        """Destructor method to free up self._pool resource."""
+        self._pool.close()
 
-  def Init(self):
-    """Generates the initial list of files."""
-    # First iteration over all the files in root searching for symlinks and
-    # non-regular files.
-    seen_inodes = {}
-    for basepath, _, filenames in sorted(os.walk(self._root)):
-      for filename in sorted(filenames):
-        full_path = os.path.join(basepath, filename)
-        rel_path = full_path[len(self._root):]
-        st = os.lstat(full_path)
+    def Init(self):
+        """Generates the initial list of files."""
+        # First iteration over all the files in root searching for symlinks and
+        # non-regular files.
+        seen_inodes = {}
+        for basepath, _, filenames in sorted(os.walk(self._root)):
+            for filename in sorted(filenames):
+                full_path = os.path.join(basepath, filename)
+                rel_path = full_path[len(self._root) :]
+                st = os.lstat(full_path)
 
-        file_data = {
-            'size': st.st_size,
+                file_data = {
+                    "size": st.st_size,
+                }
+                self._files[rel_path] = file_data
+
+                # Track symlinks.
+                if stat.S_ISLNK(st.st_mode):
+                    link_path = os.readlink(full_path)
+                    # lddtree's normpath handles a little more cases than the os.path
+                    # version. In particular, it handles the '//' case.
+                    self._symlinks[rel_path] = (
+                        link_path.lstrip("/")
+                        if link_path and link_path[0] == "/"
+                        else lddtree.normpath(
+                            os.path.join(os.path.dirname(rel_path), link_path)
+                        )
+                    )
+                    file_data["deps"] = {"symlink": [self._symlinks[rel_path]]}
+
+                # Track hardlinks.
+                if st.st_ino in seen_inodes:
+                    self._hardlinks[rel_path] = seen_inodes[st.st_ino]
+                    continue
+                seen_inodes[st.st_ino] = rel_path
+
+    def SaveJSON(self, filename):
+        """Save the computed information to a JSON file.
+
+        Args:
+          filename: The destination JSON file.
+        """
+        data = {
+            "files": self._files,
+            "ebuilds": self._ebuilds,
         }
-        self._files[rel_path] = file_data
+        json.dump(data, open(filename, "w"))
 
-        # Track symlinks.
-        if stat.S_ISLNK(st.st_mode):
-          link_path = os.readlink(full_path)
-          # lddtree's normpath handles a little more cases than the os.path
-          # version. In particular, it handles the '//' case.
-          self._symlinks[rel_path] = (
-              link_path.lstrip('/') if link_path and link_path[0] == '/' else
-              lddtree.normpath(os.path.join(os.path.dirname(rel_path),
-                                            link_path)))
-          file_data['deps'] = {
-              'symlink': [self._symlinks[rel_path]]
-          }
+    def ComputeEbuildDeps(self, sysroot):
+        """Compute the dependencies between ebuilds and files.
 
-        # Track hardlinks.
-        if st.st_ino in seen_inodes:
-          self._hardlinks[rel_path] = seen_inodes[st.st_ino]
-          continue
-        seen_inodes[st.st_ino] = rel_path
+        Iterates over the list of ebuilds in the database and annotates the files
+        with the ebuilds they are in. For each ebuild installing a file in the root,
+        also compute the direct dependencies. Stores the information internally.
 
-  def SaveJSON(self, filename):
-    """Save the computed information to a JSON file.
+        Args:
+          sysroot: The path to the sysroot, for example "/build/link".
+        """
+        portage_db = portage_util.PortageDB(sysroot)
+        if not os.path.exists(portage_db.db_path):
+            logging.warning(
+                "PortageDB directory not found: %s", portage_db.db_path
+            )
+            return
 
-    Args:
-      filename: The destination JSON file.
-    """
-    data = {
-        'files': self._files,
-        'ebuilds': self._ebuilds,
-    }
-    json.dump(data, open(filename, 'w'))
+        for pkg in portage_db.InstalledPackages():
+            pkg_files = []
+            pkg_size = 0
+            cpf = "%s/%s" % (pkg.category, pkg.pf)
+            for typ, rel_path in pkg.ListContents():
+                # We ignore other entries like for example "dir".
+                if not typ in (pkg.OBJ, pkg.SYM):
+                    continue
+                # We ignore files installed in the SYSROOT that weren't copied to the
+                # image.
+                if not rel_path in self._files:
+                    continue
+                pkg_files.append(rel_path)
+                file_data = self._files[rel_path]
+                if "ebuild" in file_data:
+                    logging.warning(
+                        "Duplicated entry for %s: %s and %s",
+                        rel_path,
+                        file_data["ebuild"],
+                        cpf,
+                    )
+                file_data["ebuild"] = cpf
+                pkg_size += file_data["size"]
+            # Ignore packages that don't install any file.
+            if not pkg_files:
+                continue
+            self._ebuilds[cpf] = {
+                "size": pkg_size,
+                "files": len(pkg_files),
+                "atom": "%s/%s" % (pkg.category, pkg.package),
+                "version": pkg.version,
+            }
+        # TODO(deymo): Parse dependencies between ebuilds.
 
-  def ComputeEbuildDeps(self, sysroot):
-    """Compute the dependencies between ebuilds and files.
+    def ComputeELFFileDeps(self):
+        """Computes the dependencies between files.
 
-    Iterates over the list of ebuilds in the database and annotates the files
-    with the ebuilds they are in. For each ebuild installing a file in the root,
-    also compute the direct dependencies. Stores the information internally.
+        Computes the dependencies between the files in the root directory passed
+        during construction. The dependencies are inferred for ELF files.
+        The list of dependencies for each file in the passed rootfs as a dict().
+        The result's keys are the relative path of the files and the value of each
+        file is a list of dependencies. A dependency is a tuple (dep_path,
+        dep_type) where the dep_path is relative path from the passed root to the
+        dependent file and dep_type is one the following strings stating how the
+        dependency was discovered:
+          'ldd': The dependent ELF file is listed as needed in the dynamic section.
+          'symlink': The dependent file is a symlink to the depending.
+        If there are dependencies of a given type whose target file wasn't
+        determined, a tuple (None, dep_type) is included. This is the case for
+        example is a program uses library that wasn't found.
+        """
+        ldpaths = lddtree.LoadLdpaths(self._root)
 
-    Args:
-      sysroot: The path to the sysroot, for example "/build/link".
-    """
-    portage_db = portage_util.PortageDB(sysroot)
-    if not os.path.exists(portage_db.db_path):
-      logging.warning('PortageDB directory not found: %s', portage_db.db_path)
-      return
+        # First iteration over all the files in root searching for symlinks and
+        # non-regular files.
+        parseelf_args = []
+        for rel_path, file_data in self._files.items():
+            if rel_path in self._symlinks or rel_path in self._hardlinks:
+                continue
 
-    for pkg in portage_db.InstalledPackages():
-      pkg_files = []
-      pkg_size = 0
-      cpf = '%s/%s' % (pkg.category, pkg.pf)
-      for typ, rel_path in pkg.ListContents():
-        # We ignore other entries like for example "dir".
-        if not typ in (pkg.OBJ, pkg.SYM):
-          continue
-        # We ignore files installed in the SYSROOT that weren't copied to the
-        # image.
-        if not rel_path in self._files:
-          continue
-        pkg_files.append(rel_path)
-        file_data = self._files[rel_path]
-        if 'ebuild' in file_data:
-          logging.warning('Duplicated entry for %s: %s and %s',
-                          rel_path, file_data['ebuild'], cpf)
-        file_data['ebuild'] = cpf
-        pkg_size += file_data['size']
-      # Ignore packages that don't install any file.
-      if not pkg_files:
-        continue
-      self._ebuilds[cpf] = {
-          'size': pkg_size,
-          'files': len(pkg_files),
-          'atom': '%s/%s' % (pkg.category, pkg.package),
-          'version': pkg.version,
-      }
-    # TODO(deymo): Parse dependencies between ebuilds.
+            full_path = os.path.join(self._root, rel_path)
+            st = os.lstat(full_path)
+            if not stat.S_ISREG(st.st_mode):
+                continue
+            parseelf_args.append((self._root, rel_path, ldpaths))
 
-  def ComputeELFFileDeps(self):
-    """Computes the dependencies between files.
+        # Parallelize the ELF lookup step since it is quite expensive.
+        elfs = dict(
+            x
+            for x in self._imap(ParseELFWithArgs, parseelf_args)
+            if not x is None
+        )
 
-    Computes the dependencies between the files in the root directory passed
-    during construction. The dependencies are inferred for ELF files.
-    The list of dependencies for each file in the passed rootfs as a dict().
-    The result's keys are the relative path of the files and the value of each
-    file is a list of dependencies. A dependency is a tuple (dep_path,
-    dep_type) where the dep_path is relative path from the passed root to the
-    dependent file and dep_type is one the following strings stating how the
-    dependency was discovered:
-      'ldd': The dependent ELF file is listed as needed in the dynamic section.
-      'symlink': The dependent file is a symlink to the depending.
-    If there are dependencies of a given type whose target file wasn't
-    determined, a tuple (None, dep_type) is included. This is the case for
-    example is a program uses library that wasn't found.
-    """
-    ldpaths = lddtree.LoadLdpaths(self._root)
+        for rel_path, elf in elfs.items():
+            file_data = self._files[rel_path]
+            # Fill in the ftype if not set yet. We complete this value at this point
+            # to avoid re-parsing the ELF file later.
+            if not "ftype" in file_data:
+                ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
+                if ftype:
+                    file_data["ftype"] = ftype
 
-    # First iteration over all the files in root searching for symlinks and
-    # non-regular files.
-    parseelf_args = []
-    for rel_path, file_data in self._files.items():
-      if rel_path in self._symlinks or rel_path in self._hardlinks:
-        continue
+            file_deps = file_data.get("deps", {})
+            # Dependencies based on the result of ldd.
+            for lib in elf.get("needed", []):
+                lib_path = elf["libs"][lib]["path"]
+                if not "ldd" in file_deps:
+                    file_deps["ldd"] = []
+                file_deps["ldd"].append(lib_path)
 
-      full_path = os.path.join(self._root, rel_path)
-      st = os.lstat(full_path)
-      if not stat.S_ISREG(st.st_mode):
-        continue
-      parseelf_args.append((self._root, rel_path, ldpaths))
+            if file_deps:
+                file_data["deps"] = file_deps
 
-    # Parallelize the ELF lookup step since it is quite expensive.
-    elfs = dict(x for x in self._imap(ParseELFWithArgs, parseelf_args)
-                if not x is None)
-
-    for rel_path, elf in elfs.items():
-      file_data = self._files[rel_path]
-      # Fill in the ftype if not set yet. We complete this value at this point
-      # to avoid re-parsing the ELF file later.
-      if not 'ftype' in file_data:
-        ftype = self._file_type_decoder.GetType(rel_path, elf=elf)
-        if ftype:
-          file_data['ftype'] = ftype
-
-      file_deps = file_data.get('deps', {})
-      # Dependencies based on the result of ldd.
-      for lib in elf.get('needed', []):
-        lib_path = elf['libs'][lib]['path']
-        if not 'ldd' in file_deps:
-          file_deps['ldd'] = []
-        file_deps['ldd'].append(lib_path)
-
-      if file_deps:
-        file_data['deps'] = file_deps
-
-  def ComputeFileTypes(self):
-    """Computes all the missing file type for the files in the root."""
-    for rel_path, file_data in self._files.items():
-      if 'ftype' in file_data:
-        continue
-      ftype = self._file_type_decoder.GetType(rel_path)
-      if ftype:
-        file_data['ftype'] = ftype
+    def ComputeFileTypes(self):
+        """Computes all the missing file type for the files in the root."""
+        for rel_path, file_data in self._files.items():
+            if "ftype" in file_data:
+                continue
+            ftype = self._file_type_decoder.GetType(rel_path)
+            if ftype:
+                file_data["ftype"] = ftype
 
 
 def ParseArgs(argv):
-  """Return parsed commandline arguments."""
+    """Return parsed commandline arguments."""
 
-  parser = commandline.ArgumentParser()
-  parser.add_argument(
-      '-j', '--jobs', type=int, default=multiprocessing.cpu_count(),
-      help='number of simultaneous jobs.')
-  parser.add_argument(
-      '--sysroot', type='path', metavar='SYSROOT',
-      help='parse portage DB for ebuild information from the provided sysroot.')
-  parser.add_argument(
-      '--json', type='path',
-      help='store information in JSON file.')
+    parser = commandline.ArgumentParser()
+    parser.add_argument(
+        "-j",
+        "--jobs",
+        type=int,
+        default=multiprocessing.cpu_count(),
+        help="number of simultaneous jobs.",
+    )
+    parser.add_argument(
+        "--sysroot",
+        type="path",
+        metavar="SYSROOT",
+        help="parse portage DB for ebuild information from the provided sysroot.",
+    )
+    parser.add_argument(
+        "--json", type="path", help="store information in JSON file."
+    )
 
-  parser.add_argument(
-      'root', type='path',
-      help='path to the directory where the rootfs is mounted.')
+    parser.add_argument(
+        "root",
+        type="path",
+        help="path to the directory where the rootfs is mounted.",
+    )
 
-  opts = parser.parse_args(argv)
-  opts.Freeze()
-  return opts
+    opts = parser.parse_args(argv)
+    opts.Freeze()
+    return opts
 
 
 def main(argv):
-  """Main function to start the script."""
-  opts = ParseArgs(argv)
-  logging.debug('Options are %s', opts)
+    """Main function to start the script."""
+    opts = ParseArgs(argv)
+    logging.debug("Options are %s", opts)
 
-  dt = DepTracker(opts.root, jobs=opts.jobs)
-  dt.Init()
+    dt = DepTracker(opts.root, jobs=opts.jobs)
+    dt.Init()
 
-  dt.ComputeELFFileDeps()
-  dt.ComputeFileTypes()
+    dt.ComputeELFFileDeps()
+    dt.ComputeFileTypes()
 
-  if opts.sysroot:
-    dt.ComputeEbuildDeps(opts.sysroot)
+    if opts.sysroot:
+        dt.ComputeEbuildDeps(opts.sysroot)
 
-  if opts.json:
-    dt.SaveJSON(opts.json)
+    if opts.json:
+        dt.SaveJSON(opts.json)
commit	1699fabc1efaaa1f1f02d199a0ec0cedfe2adc63	[log] [tgz]
author	Alex Klein <saklein@chromium.org>	Thu Sep 08 08:46:06 2022 -0600
committer	Alex Klein <saklein@chromium.org>	Fri Sep 09 18:06:48 2022 +0000
tree	d4c8e4c1e1378304bebde88a806a175148e9886d
parent	6b8f9fb41a267523ddfb09fd96150d6a9e9b4471 [diff] [blame]