blob: f6b302f5063cf1c380ca738ca7be7c933541733e [file] [log] [blame]
Benjamin Gordon1f4537f2019-12-06 09:10:56 -07001# Copyright 2020 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Run xz from PATH with a thread for each core in the system."""
6
Ben Pastenec6bf5492020-08-28 17:35:01 -07007from __future__ import division
Benjamin Gordon1f4537f2019-12-06 09:10:56 -07008
George Burgess IV757887f2021-09-22 15:58:35 -07009import getopt
Benjamin Gordon1f4537f2019-12-06 09:10:56 -070010import os
George Burgess IV757887f2021-09-22 15:58:35 -070011import subprocess
12import sys
Mike Frysinger687ab9d2020-02-06 00:35:15 -050013
Ben Pastenec6bf5492020-08-28 17:35:01 -070014from chromite.lib import commandline
15from chromite.lib import osutils
16from chromite.utils import memoize
17
Benjamin Gordon1f4537f2019-12-06 09:10:56 -070018
George Burgess IV757887f2021-09-22 15:58:35 -070019PIXZ_DISABLE_VAR = 'FOR_TEST_XZ_AUTO_NO_PIXZ'
George Burgess IVd67df462021-10-27 14:31:33 -070020XZ_DISABLE_VAR = 'FOR_TEST_XZ_AUTO_NO_XZ_DECOMPRESSION'
George Burgess IV757887f2021-09-22 15:58:35 -070021
22
Ben Pastenec6bf5492020-08-28 17:35:01 -070023@memoize.Memoize
24def HasPixz():
25 """Returns path to pixz if it's on PATH or None otherwise."""
George Burgess IV757887f2021-09-22 15:58:35 -070026 return PIXZ_DISABLE_VAR not in os.environ and osutils.Which('pixz')
Ben Pastenec6bf5492020-08-28 17:35:01 -070027
28
George Burgess IV757887f2021-09-22 15:58:35 -070029def ParsePixzArgs(argv):
30 """Determines flags to pass to pixz, per argv.
Denis Nikitin091a7c42021-10-27 18:55:27 +000031
George Burgess IV757887f2021-09-22 15:58:35 -070032 Returns:
33 A tuple containing:
34 - A raw list of flags to pass to pixz.
35 - An optional input file.
36 - An optional output file (only exists if the input file is present).
Denis Nikitin091a7c42021-10-27 18:55:27 +000037 """
George Burgess IV757887f2021-09-22 15:58:35 -070038 # Glancing at docs, the following opts are supported. -i and -o are ignored,
39 # since we assert in `main` that they're not present, but include parsing for
40 # them anyway.
41 flags, args = getopt.gnu_getopt(
42 args=argv,
43 shortopts='dlxi:o:0123456789p:tkch',
44 )
45 if not args:
46 file_to_compress = None
47 target = None
48 elif len(args) == 1:
49 file_to_compress = args[0]
50 target = None
51 else:
52 file_to_compress = args[0]
53 target = args[1]
54
55 raw_flag_list = []
56 for key, val in flags:
57 raw_flag_list.append(key)
58 if val:
59 raw_flag_list.append(val)
60
61 return raw_flag_list, file_to_compress, target
Ben Pastenec6bf5492020-08-28 17:35:01 -070062
63
George Burgess IV757887f2021-09-22 15:58:35 -070064def Execvp(argv):
65 """Execs the given argv."""
66 os.execvp(argv[0], argv)
67
68
69def ExecCompressCommand(stdout, argv):
70 """Execs compression command."""
71 # It appears that in order for pixz to do parallel decompression, compression
72 # needs to be done with pixz. xz itself is only capable of parallel
73 # compression.
74 if not HasPixz():
75 cmd = ['xz']
76
77 if stdout:
78 cmd.append('-zc')
79 else:
80 cmd.append('-z')
81 cmd += argv
82 Execvp(cmd)
83
84 cmd = ['pixz']
85 raw_flag_list, compressed_file_name, output_file = ParsePixzArgs(argv)
86
George Burgess IVd67df462021-10-27 14:31:33 -070087 # Pixz treats tarballs specially: if it detects that a tarball has been
88 # passed to it, it'll also write a small index in the output file that
89 # makes operations like listing the tar faster. If this tar autodetection
90 # is enabled and pixz is asked to compress an empty file, it breaks. In
91 # addition, these indices have no apparent impact on decompression
92 # parallelism, so they're not super useful to us. Disable the feature
93 # wholesale.
94 if '-t' not in raw_flag_list:
95 raw_flag_list.append('-t')
96
George Burgess IV757887f2021-09-22 15:58:35 -070097 autodelete_input_file = False
98 if not compressed_file_name:
99 assert not output_file
100 compressed_file_name = '/dev/stdin'
101 output_file = '/dev/stdout'
102 elif stdout:
103 output_file = '/dev/stdout'
104 elif not output_file:
105 # Pixz defaults to a `.pxz` suffix (or `.tpxz` if it's compressing a
106 # tar file). We need the suffix to be consistent, so force it here.
107 output_file = f'{compressed_file_name}.xz'
108 autodelete_input_file = True
109
110 cmd += raw_flag_list
111 cmd.append(compressed_file_name)
112 if output_file:
113 cmd.append(output_file)
114
115 if not autodelete_input_file:
116 Execvp(cmd)
117
118 return_code = subprocess.call(cmd)
119 if not return_code:
120 os.unlink(compressed_file_name)
121 sys.exit(return_code)
122
123
George Burgess IVd67df462021-10-27 14:31:33 -0700124def ExecXzDecompressCommand(stdout, argv):
125 """Executes `xz` with the given params."""
George Burgess IV757887f2021-09-22 15:58:35 -0700126 cmd = ['xz']
Tiancong Wangac3fc4a2020-09-11 10:44:03 -0700127 if stdout:
George Burgess IV757887f2021-09-22 15:58:35 -0700128 cmd.append('-dc')
129 else:
130 cmd.append('-d')
131 cmd += argv
132 Execvp(cmd)
Ben Pastenec6bf5492020-08-28 17:35:01 -0700133
134
George Burgess IVd67df462021-10-27 14:31:33 -0700135def ExecDecompressCommand(stdout, argv):
136 """Execs decompression command."""
137 if not HasPixz():
138 ExecXzDecompressCommand(stdout, argv)
139
140 cmd = ['pixz', '-d']
141 raw_flag_list, compressed_file_name, output_file_name = ParsePixzArgs(argv)
142 cmd += raw_flag_list
143
144 assert compressed_file_name or not output_file_name
145 if not compressed_file_name:
146 Execvp(cmd)
147
148 # HACK: When passed a file, pixz will jump around it and try to find the
149 # file's index. If the file we originally compressed was empty and we
150 # requested no index, pixz will error out because lzma will report no
151 # entries to it, and pixz doesn't handle that well.
152 #
153 # Since we need to support files with indices and without, we can't pass
154 # `-t`. If we do, that causes pixz to error out occasionally on tar files
155 # with indices. :(
156 #
157 # In any case, at the time I checked, empty xz files are 32 bytes, so just
158 # opt to use xz for anything under 4KB. pixz archives are xz-compatible
159 # anyway.
160 if (XZ_DISABLE_VAR not in os.environ and
161 os.path.isfile(compressed_file_name) and
162 os.path.getsize(compressed_file_name) <= 4 * 1024):
163 ExecXzDecompressCommand(stdout, argv)
164
165 cmd.append(compressed_file_name)
166
167 # Explicitly tell pixz the file is the input, so it will dump the output
168 # to stdout, instead of automatically choosing an output name.
169 if stdout:
170 output_file_name = '/dev/stdout'
171
172 if output_file_name:
173 cmd.append(output_file_name)
174
175 Execvp(cmd)
176
177
Ben Pastenec6bf5492020-08-28 17:35:01 -0700178def GetParser():
179 """Return a command line parser."""
180 parser = commandline.ArgumentParser(description=__doc__)
181 parser.add_argument(
Tiancong Wangac3fc4a2020-09-11 10:44:03 -0700182 '-d',
183 '--decompress',
184 '--uncompress',
Ben Pastenec6bf5492020-08-28 17:35:01 -0700185 help='Decompress rather than compress.',
186 action='store_true')
Tiancong Wangac3fc4a2020-09-11 10:44:03 -0700187 parser.add_argument(
188 '-c',
189 dest='stdout',
190 action='store_true',
191 help="Write to standard output and don't delete input files.")
Ben Pastenec6bf5492020-08-28 17:35:01 -0700192 return parser
193
194
Benjamin Gordon1f4537f2019-12-06 09:10:56 -0700195def main(argv):
Ben Pastenec6bf5492020-08-28 17:35:01 -0700196 parser = GetParser()
197 known_args, argv = parser.parse_known_args()
Tiancong Wangac3fc4a2020-09-11 10:44:03 -0700198 if '-i' in argv or '-o' in argv:
199 parser.error('It is invalid to use -i or -o with xz_auto')
200
Ben Pastenec6bf5492020-08-28 17:35:01 -0700201 if known_args.decompress:
George Burgess IV757887f2021-09-22 15:58:35 -0700202 ExecDecompressCommand(known_args.stdout, argv)
Tiancong Wangac3fc4a2020-09-11 10:44:03 -0700203 else:
George Burgess IV757887f2021-09-22 15:58:35 -0700204 ExecCompressCommand(known_args.stdout, argv)