Mike Frysinger | f1ba7ad | 2022-09-12 05:42:57 -0400 | [diff] [blame] | 1 | # Copyright 2020 The ChromiumOS Authors |
Benjamin Gordon | 1f4537f | 2019-12-06 09:10:56 -0700 | [diff] [blame] | 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Run xz from PATH with a thread for each core in the system.""" |
| 6 | |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 7 | from __future__ import division |
Benjamin Gordon | 1f4537f | 2019-12-06 09:10:56 -0700 | [diff] [blame] | 8 | |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 9 | import getopt |
Benjamin Gordon | 1f4537f | 2019-12-06 09:10:56 -0700 | [diff] [blame] | 10 | import os |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 11 | import subprocess |
| 12 | import sys |
Mike Frysinger | 687ab9d | 2020-02-06 00:35:15 -0500 | [diff] [blame] | 13 | |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 14 | from chromite.lib import commandline |
| 15 | from chromite.lib import osutils |
| 16 | from chromite.utils import memoize |
| 17 | |
Benjamin Gordon | 1f4537f | 2019-12-06 09:10:56 -0700 | [diff] [blame] | 18 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 19 | PIXZ_DISABLE_VAR = "FOR_TEST_XZ_AUTO_NO_PIXZ" |
| 20 | XZ_DISABLE_VAR = "FOR_TEST_XZ_AUTO_NO_XZ_DECOMPRESSION" |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 21 | |
| 22 | |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 23 | @memoize.Memoize |
| 24 | def HasPixz(): |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 25 | """Returns path to pixz if it's on PATH or None otherwise.""" |
| 26 | return PIXZ_DISABLE_VAR not in os.environ and osutils.Which("pixz") |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 27 | |
| 28 | |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 29 | def ParsePixzArgs(argv): |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 30 | """Determines flags to pass to pixz, per argv. |
Denis Nikitin | 091a7c4 | 2021-10-27 18:55:27 +0000 | [diff] [blame] | 31 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 32 | Returns: |
Trent Apted | c20bb6d | 2023-05-10 15:00:03 +1000 | [diff] [blame^] | 33 | A tuple containing: |
| 34 | - A raw list of flags to pass to pixz. |
| 35 | - An optional input file. |
| 36 | - An optional output file (only exists if the input file is present). |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 37 | """ |
| 38 | # Glancing at docs, the following opts are supported. -i and -o are ignored, |
| 39 | # since we assert in `main` that they're not present, but include parsing for |
| 40 | # them anyway. |
| 41 | flags, args = getopt.gnu_getopt( |
| 42 | args=argv, |
| 43 | shortopts="dlxi:o:0123456789p:tkch", |
| 44 | ) |
| 45 | if not args: |
| 46 | file_to_compress = None |
| 47 | target = None |
| 48 | elif len(args) == 1: |
| 49 | file_to_compress = args[0] |
| 50 | target = None |
| 51 | else: |
| 52 | file_to_compress = args[0] |
| 53 | target = args[1] |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 54 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 55 | raw_flag_list = [] |
| 56 | for key, val in flags: |
| 57 | raw_flag_list.append(key) |
| 58 | if val: |
| 59 | raw_flag_list.append(val) |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 60 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 61 | return raw_flag_list, file_to_compress, target |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 62 | |
| 63 | |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 64 | def Execvp(argv): |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 65 | """Execs the given argv.""" |
| 66 | os.execvp(argv[0], argv) |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 67 | |
| 68 | |
| 69 | def ExecCompressCommand(stdout, argv): |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 70 | """Execs compression command.""" |
| 71 | # It appears that in order for pixz to do parallel decompression, compression |
| 72 | # needs to be done with pixz. xz itself is only capable of parallel |
| 73 | # compression. |
| 74 | if not HasPixz(): |
| 75 | cmd = ["xz"] |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 76 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 77 | if stdout: |
| 78 | cmd.append("-zc") |
| 79 | else: |
| 80 | cmd.append("-z") |
| 81 | cmd += argv |
| 82 | Execvp(cmd) |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 83 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 84 | cmd = ["pixz"] |
| 85 | raw_flag_list, compressed_file_name, output_file = ParsePixzArgs(argv) |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 86 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 87 | # Pixz treats tarballs specially: if it detects that a tarball has been |
| 88 | # passed to it, it'll also write a small index in the output file that |
| 89 | # makes operations like listing the tar faster. If this tar autodetection |
| 90 | # is enabled and pixz is asked to compress an empty file, it breaks. In |
| 91 | # addition, these indices have no apparent impact on decompression |
| 92 | # parallelism, so they're not super useful to us. Disable the feature |
| 93 | # wholesale. |
| 94 | if "-t" not in raw_flag_list: |
| 95 | raw_flag_list.append("-t") |
George Burgess IV | d67df46 | 2021-10-27 14:31:33 -0700 | [diff] [blame] | 96 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 97 | autodelete_input_file = False |
| 98 | if not compressed_file_name: |
| 99 | assert not output_file |
| 100 | compressed_file_name = "/dev/stdin" |
| 101 | output_file = "/dev/stdout" |
| 102 | elif stdout: |
| 103 | output_file = "/dev/stdout" |
| 104 | elif not output_file: |
| 105 | # Pixz defaults to a `.pxz` suffix (or `.tpxz` if it's compressing a |
| 106 | # tar file). We need the suffix to be consistent, so force it here. |
| 107 | output_file = f"{compressed_file_name}.xz" |
| 108 | autodelete_input_file = True |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 109 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 110 | cmd += raw_flag_list |
| 111 | cmd.append(compressed_file_name) |
| 112 | if output_file: |
| 113 | cmd.append(output_file) |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 114 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 115 | if not autodelete_input_file: |
| 116 | Execvp(cmd) |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 117 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 118 | return_code = subprocess.call(cmd) |
| 119 | if not return_code: |
| 120 | os.unlink(compressed_file_name) |
| 121 | sys.exit(return_code) |
George Burgess IV | 757887f | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 122 | |
| 123 | |
George Burgess IV | d67df46 | 2021-10-27 14:31:33 -0700 | [diff] [blame] | 124 | def ExecXzDecompressCommand(stdout, argv): |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 125 | """Executes `xz` with the given params.""" |
| 126 | cmd = ["xz"] |
| 127 | if stdout: |
| 128 | cmd.append("-dc") |
| 129 | else: |
| 130 | cmd.append("-d") |
| 131 | cmd += argv |
| 132 | Execvp(cmd) |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 133 | |
| 134 | |
George Burgess IV | d67df46 | 2021-10-27 14:31:33 -0700 | [diff] [blame] | 135 | def ExecDecompressCommand(stdout, argv): |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 136 | """Execs decompression command.""" |
| 137 | if not HasPixz(): |
| 138 | ExecXzDecompressCommand(stdout, argv) |
George Burgess IV | d67df46 | 2021-10-27 14:31:33 -0700 | [diff] [blame] | 139 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 140 | cmd = ["pixz", "-d"] |
| 141 | raw_flag_list, compressed_file_name, output_file_name = ParsePixzArgs(argv) |
| 142 | cmd += raw_flag_list |
George Burgess IV | d67df46 | 2021-10-27 14:31:33 -0700 | [diff] [blame] | 143 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 144 | assert compressed_file_name or not output_file_name |
| 145 | if not compressed_file_name: |
| 146 | Execvp(cmd) |
| 147 | |
| 148 | # HACK: When passed a file, pixz will jump around it and try to find the |
| 149 | # file's index. If the file we originally compressed was empty and we |
| 150 | # requested no index, pixz will error out because lzma will report no |
| 151 | # entries to it, and pixz doesn't handle that well. |
| 152 | # |
| 153 | # Since we need to support files with indices and without, we can't pass |
| 154 | # `-t`. If we do, that causes pixz to error out occasionally on tar files |
| 155 | # with indices. :( |
| 156 | # |
| 157 | # In any case, at the time I checked, empty xz files are 32 bytes, so just |
| 158 | # opt to use xz for anything under 4KB. pixz archives are xz-compatible |
| 159 | # anyway. |
| 160 | if ( |
| 161 | XZ_DISABLE_VAR not in os.environ |
| 162 | and os.path.isfile(compressed_file_name) |
| 163 | and os.path.getsize(compressed_file_name) <= 4 * 1024 |
| 164 | ): |
| 165 | ExecXzDecompressCommand(stdout, argv) |
| 166 | |
| 167 | cmd.append(compressed_file_name) |
| 168 | |
| 169 | # Explicitly tell pixz the file is the input, so it will dump the output |
| 170 | # to stdout, instead of automatically choosing an output name. |
| 171 | if stdout: |
| 172 | output_file_name = "/dev/stdout" |
| 173 | |
| 174 | if output_file_name: |
| 175 | cmd.append(output_file_name) |
| 176 | |
George Burgess IV | d67df46 | 2021-10-27 14:31:33 -0700 | [diff] [blame] | 177 | Execvp(cmd) |
| 178 | |
George Burgess IV | d67df46 | 2021-10-27 14:31:33 -0700 | [diff] [blame] | 179 | |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 180 | def GetParser(): |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 181 | """Return a command line parser.""" |
| 182 | parser = commandline.ArgumentParser(description=__doc__) |
| 183 | parser.add_argument( |
| 184 | "-d", |
| 185 | "--decompress", |
| 186 | "--uncompress", |
| 187 | help="Decompress rather than compress.", |
| 188 | action="store_true", |
| 189 | ) |
| 190 | parser.add_argument( |
| 191 | "-c", |
| 192 | dest="stdout", |
| 193 | action="store_true", |
| 194 | help="Write to standard output and don't delete input files.", |
| 195 | ) |
| 196 | return parser |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 197 | |
| 198 | |
Benjamin Gordon | 1f4537f | 2019-12-06 09:10:56 -0700 | [diff] [blame] | 199 | def main(argv): |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 200 | parser = GetParser() |
| 201 | known_args, argv = parser.parse_known_args() |
| 202 | if "-i" in argv or "-o" in argv: |
| 203 | parser.error("It is invalid to use -i or -o with xz_auto") |
Tiancong Wang | ac3fc4a | 2020-09-11 10:44:03 -0700 | [diff] [blame] | 204 | |
Alex Klein | 1699fab | 2022-09-08 08:46:06 -0600 | [diff] [blame] | 205 | if known_args.decompress: |
| 206 | ExecDecompressCommand(known_args.stdout, argv) |
| 207 | else: |
| 208 | ExecCompressCommand(known_args.stdout, argv) |