blob: df52200335774dea843e0cc4e3f88424301ff6d3 [file] [log] [blame]
Benjamin Gordon1f4537f2019-12-06 09:10:56 -07001# Copyright 2020 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Run xz from PATH with a thread for each core in the system."""
6
Ben Pastenec6bf5492020-08-28 17:35:01 -07007from __future__ import division
Benjamin Gordon1f4537f2019-12-06 09:10:56 -07008
George Burgess IV757887f2021-09-22 15:58:35 -07009import getopt
Benjamin Gordon1f4537f2019-12-06 09:10:56 -070010import os
George Burgess IV757887f2021-09-22 15:58:35 -070011import subprocess
12import sys
Mike Frysinger687ab9d2020-02-06 00:35:15 -050013
Ben Pastenec6bf5492020-08-28 17:35:01 -070014from chromite.lib import commandline
15from chromite.lib import osutils
16from chromite.utils import memoize
17
Benjamin Gordon1f4537f2019-12-06 09:10:56 -070018
Alex Klein1699fab2022-09-08 08:46:06 -060019PIXZ_DISABLE_VAR = "FOR_TEST_XZ_AUTO_NO_PIXZ"
20XZ_DISABLE_VAR = "FOR_TEST_XZ_AUTO_NO_XZ_DECOMPRESSION"
George Burgess IV757887f2021-09-22 15:58:35 -070021
22
Ben Pastenec6bf5492020-08-28 17:35:01 -070023@memoize.Memoize
24def HasPixz():
Alex Klein1699fab2022-09-08 08:46:06 -060025 """Returns path to pixz if it's on PATH or None otherwise."""
26 return PIXZ_DISABLE_VAR not in os.environ and osutils.Which("pixz")
Ben Pastenec6bf5492020-08-28 17:35:01 -070027
28
George Burgess IV757887f2021-09-22 15:58:35 -070029def ParsePixzArgs(argv):
Alex Klein1699fab2022-09-08 08:46:06 -060030 """Determines flags to pass to pixz, per argv.
Denis Nikitin091a7c42021-10-27 18:55:27 +000031
Alex Klein1699fab2022-09-08 08:46:06 -060032 Returns:
33 A tuple containing:
34 - A raw list of flags to pass to pixz.
35 - An optional input file.
36 - An optional output file (only exists if the input file is present).
37 """
38 # Glancing at docs, the following opts are supported. -i and -o are ignored,
39 # since we assert in `main` that they're not present, but include parsing for
40 # them anyway.
41 flags, args = getopt.gnu_getopt(
42 args=argv,
43 shortopts="dlxi:o:0123456789p:tkch",
44 )
45 if not args:
46 file_to_compress = None
47 target = None
48 elif len(args) == 1:
49 file_to_compress = args[0]
50 target = None
51 else:
52 file_to_compress = args[0]
53 target = args[1]
George Burgess IV757887f2021-09-22 15:58:35 -070054
Alex Klein1699fab2022-09-08 08:46:06 -060055 raw_flag_list = []
56 for key, val in flags:
57 raw_flag_list.append(key)
58 if val:
59 raw_flag_list.append(val)
George Burgess IV757887f2021-09-22 15:58:35 -070060
Alex Klein1699fab2022-09-08 08:46:06 -060061 return raw_flag_list, file_to_compress, target
Ben Pastenec6bf5492020-08-28 17:35:01 -070062
63
George Burgess IV757887f2021-09-22 15:58:35 -070064def Execvp(argv):
Alex Klein1699fab2022-09-08 08:46:06 -060065 """Execs the given argv."""
66 os.execvp(argv[0], argv)
George Burgess IV757887f2021-09-22 15:58:35 -070067
68
69def ExecCompressCommand(stdout, argv):
Alex Klein1699fab2022-09-08 08:46:06 -060070 """Execs compression command."""
71 # It appears that in order for pixz to do parallel decompression, compression
72 # needs to be done with pixz. xz itself is only capable of parallel
73 # compression.
74 if not HasPixz():
75 cmd = ["xz"]
George Burgess IV757887f2021-09-22 15:58:35 -070076
Alex Klein1699fab2022-09-08 08:46:06 -060077 if stdout:
78 cmd.append("-zc")
79 else:
80 cmd.append("-z")
81 cmd += argv
82 Execvp(cmd)
George Burgess IV757887f2021-09-22 15:58:35 -070083
Alex Klein1699fab2022-09-08 08:46:06 -060084 cmd = ["pixz"]
85 raw_flag_list, compressed_file_name, output_file = ParsePixzArgs(argv)
George Burgess IV757887f2021-09-22 15:58:35 -070086
Alex Klein1699fab2022-09-08 08:46:06 -060087 # Pixz treats tarballs specially: if it detects that a tarball has been
88 # passed to it, it'll also write a small index in the output file that
89 # makes operations like listing the tar faster. If this tar autodetection
90 # is enabled and pixz is asked to compress an empty file, it breaks. In
91 # addition, these indices have no apparent impact on decompression
92 # parallelism, so they're not super useful to us. Disable the feature
93 # wholesale.
94 if "-t" not in raw_flag_list:
95 raw_flag_list.append("-t")
George Burgess IVd67df462021-10-27 14:31:33 -070096
Alex Klein1699fab2022-09-08 08:46:06 -060097 autodelete_input_file = False
98 if not compressed_file_name:
99 assert not output_file
100 compressed_file_name = "/dev/stdin"
101 output_file = "/dev/stdout"
102 elif stdout:
103 output_file = "/dev/stdout"
104 elif not output_file:
105 # Pixz defaults to a `.pxz` suffix (or `.tpxz` if it's compressing a
106 # tar file). We need the suffix to be consistent, so force it here.
107 output_file = f"{compressed_file_name}.xz"
108 autodelete_input_file = True
George Burgess IV757887f2021-09-22 15:58:35 -0700109
Alex Klein1699fab2022-09-08 08:46:06 -0600110 cmd += raw_flag_list
111 cmd.append(compressed_file_name)
112 if output_file:
113 cmd.append(output_file)
George Burgess IV757887f2021-09-22 15:58:35 -0700114
Alex Klein1699fab2022-09-08 08:46:06 -0600115 if not autodelete_input_file:
116 Execvp(cmd)
George Burgess IV757887f2021-09-22 15:58:35 -0700117
Alex Klein1699fab2022-09-08 08:46:06 -0600118 return_code = subprocess.call(cmd)
119 if not return_code:
120 os.unlink(compressed_file_name)
121 sys.exit(return_code)
George Burgess IV757887f2021-09-22 15:58:35 -0700122
123
George Burgess IVd67df462021-10-27 14:31:33 -0700124def ExecXzDecompressCommand(stdout, argv):
Alex Klein1699fab2022-09-08 08:46:06 -0600125 """Executes `xz` with the given params."""
126 cmd = ["xz"]
127 if stdout:
128 cmd.append("-dc")
129 else:
130 cmd.append("-d")
131 cmd += argv
132 Execvp(cmd)
Ben Pastenec6bf5492020-08-28 17:35:01 -0700133
134
George Burgess IVd67df462021-10-27 14:31:33 -0700135def ExecDecompressCommand(stdout, argv):
Alex Klein1699fab2022-09-08 08:46:06 -0600136 """Execs decompression command."""
137 if not HasPixz():
138 ExecXzDecompressCommand(stdout, argv)
George Burgess IVd67df462021-10-27 14:31:33 -0700139
Alex Klein1699fab2022-09-08 08:46:06 -0600140 cmd = ["pixz", "-d"]
141 raw_flag_list, compressed_file_name, output_file_name = ParsePixzArgs(argv)
142 cmd += raw_flag_list
George Burgess IVd67df462021-10-27 14:31:33 -0700143
Alex Klein1699fab2022-09-08 08:46:06 -0600144 assert compressed_file_name or not output_file_name
145 if not compressed_file_name:
146 Execvp(cmd)
147
148 # HACK: When passed a file, pixz will jump around it and try to find the
149 # file's index. If the file we originally compressed was empty and we
150 # requested no index, pixz will error out because lzma will report no
151 # entries to it, and pixz doesn't handle that well.
152 #
153 # Since we need to support files with indices and without, we can't pass
154 # `-t`. If we do, that causes pixz to error out occasionally on tar files
155 # with indices. :(
156 #
157 # In any case, at the time I checked, empty xz files are 32 bytes, so just
158 # opt to use xz for anything under 4KB. pixz archives are xz-compatible
159 # anyway.
160 if (
161 XZ_DISABLE_VAR not in os.environ
162 and os.path.isfile(compressed_file_name)
163 and os.path.getsize(compressed_file_name) <= 4 * 1024
164 ):
165 ExecXzDecompressCommand(stdout, argv)
166
167 cmd.append(compressed_file_name)
168
169 # Explicitly tell pixz the file is the input, so it will dump the output
170 # to stdout, instead of automatically choosing an output name.
171 if stdout:
172 output_file_name = "/dev/stdout"
173
174 if output_file_name:
175 cmd.append(output_file_name)
176
George Burgess IVd67df462021-10-27 14:31:33 -0700177 Execvp(cmd)
178
George Burgess IVd67df462021-10-27 14:31:33 -0700179
Ben Pastenec6bf5492020-08-28 17:35:01 -0700180def GetParser():
Alex Klein1699fab2022-09-08 08:46:06 -0600181 """Return a command line parser."""
182 parser = commandline.ArgumentParser(description=__doc__)
183 parser.add_argument(
184 "-d",
185 "--decompress",
186 "--uncompress",
187 help="Decompress rather than compress.",
188 action="store_true",
189 )
190 parser.add_argument(
191 "-c",
192 dest="stdout",
193 action="store_true",
194 help="Write to standard output and don't delete input files.",
195 )
196 return parser
Ben Pastenec6bf5492020-08-28 17:35:01 -0700197
198
Benjamin Gordon1f4537f2019-12-06 09:10:56 -0700199def main(argv):
Alex Klein1699fab2022-09-08 08:46:06 -0600200 parser = GetParser()
201 known_args, argv = parser.parse_known_args()
202 if "-i" in argv or "-o" in argv:
203 parser.error("It is invalid to use -i or -o with xz_auto")
Tiancong Wangac3fc4a2020-09-11 10:44:03 -0700204
Alex Klein1699fab2022-09-08 08:46:06 -0600205 if known_args.decompress:
206 ExecDecompressCommand(known_args.stdout, argv)
207 else:
208 ExecCompressCommand(known_args.stdout, argv)