Benjamin Gordon | 1f4537f | 2019-12-06 09:10:56 -0700 | [diff] [blame] | 1 | # Copyright 2020 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Run xz from PATH with a thread for each core in the system.""" |
| 6 | |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 7 | from __future__ import division |
Benjamin Gordon | 1f4537f | 2019-12-06 09:10:56 -0700 | [diff] [blame] | 8 | |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 9 | import getopt |
Benjamin Gordon | 1f4537f | 2019-12-06 09:10:56 -0700 | [diff] [blame] | 10 | import os |
Mike Frysinger | 687ab9d | 2020-02-06 00:35:15 -0500 | [diff] [blame] | 11 | |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 12 | from chromite.lib import commandline |
| 13 | from chromite.lib import osutils |
| 14 | from chromite.utils import memoize |
| 15 | |
Benjamin Gordon | 1f4537f | 2019-12-06 09:10:56 -0700 | [diff] [blame] | 16 | |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 17 | PIXZ_DISABLE_VAR = 'FOR_TEST_XZ_AUTO_NO_PIXZ' |
| 18 | |
| 19 | |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 20 | @memoize.Memoize |
| 21 | def HasPixz(): |
| 22 | """Returns path to pixz if it's on PATH or None otherwise.""" |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 23 | return osutils.Which('pixz') and not os.environ.get(PIXZ_DISABLE_VAR) |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 24 | |
| 25 | |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 26 | def BasePixzCommand(jobs): |
| 27 | """Returns a command that invokes pixz with the given job count.""" |
| 28 | return ['pixz', '-p', str(jobs)] |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 29 | |
| 30 | |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 31 | def BaseXzCommand(jobs): |
| 32 | """Returns a command that invokes xz with the given job count.""" |
| 33 | return ['xz', f'-T{jobs}'] |
| 34 | |
| 35 | |
| 36 | def DetermineFilesPassedToPixz(argv): |
| 37 | """Attempt to figure out what file we're trying to compress.""" |
| 38 | # Glancing at docs, the following opts are supported. -i and -o are ignored, |
| 39 | # since we assert in `main` that they're not present, but include parsing for |
| 40 | # them anyway. |
| 41 | _, args = getopt.gnu_getopt( |
| 42 | args=argv, |
| 43 | shortopts='dlxi:o:0123456789p:tkch', |
| 44 | ) |
| 45 | if not args: |
| 46 | file_to_compress = None |
| 47 | target = None |
| 48 | elif len(args) == 1: |
| 49 | file_to_compress = args[0] |
| 50 | target = None |
| 51 | else: |
| 52 | file_to_compress = args[0] |
| 53 | target = args[1] |
| 54 | |
| 55 | return file_to_compress, target |
| 56 | |
| 57 | |
| 58 | def GetCompressCommand(stdout, jobs, argv): |
| 59 | """Returns compression command.""" |
| 60 | # It appears that in order for pixz to do parallel decompression, compression |
| 61 | # needs to be done with pixz. xz itself is only capable of parallel |
| 62 | # compression. |
| 63 | if HasPixz(): |
| 64 | cmd = BasePixzCommand(jobs) |
| 65 | |
| 66 | compressed_file_name, specifies_output_file = DetermineFilesPassedToPixz( |
| 67 | argv) |
| 68 | |
| 69 | if compressed_file_name: |
| 70 | if not (stdout or specifies_output_file): |
| 71 | # Pixz defaults to a `.pxz` suffix (or `.tpxz` if it's compressing a |
| 72 | # tar file). We need the suffix to be consistent, so force it here. |
| 73 | cmd += ['-o', f'{compressed_file_name}.xz'] |
| 74 | else: |
| 75 | cmd += ['-i', '/dev/stdin'] |
| 76 | return cmd |
| 77 | |
| 78 | cmd = BaseXzCommand(jobs) |
| 79 | |
| 80 | if stdout: |
| 81 | cmd.append('-zc') |
| 82 | else: |
| 83 | cmd.append('-z') |
| 84 | return cmd |
| 85 | |
| 86 | |
| 87 | def GetDecompressCommand(stdout, jobs, argv): |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 88 | """Returns decompression command.""" |
| 89 | if HasPixz(): |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 90 | cmd = BasePixzCommand(jobs) |
| 91 | cmd.append('-d') |
| 92 | |
| 93 | compressed_file_name, _ = DetermineFilesPassedToPixz(argv) |
Tiancong Wang | ac3fc4a | 2020-09-11 10:44:03 -0700 | [diff] [blame] | 94 | if stdout: |
| 95 | # Explicitly tell pixz the file is the input, so it will dump the output |
| 96 | # to stdout, instead of automatically choosing an output name. |
| 97 | cmd.append('-i') |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 98 | if not compressed_file_name: |
| 99 | cmd.append('/dev/stdin') |
| 100 | elif not compressed_file_name: |
| 101 | cmd += ['-i', '/dev/stdin'] |
Tiancong Wang | ac3fc4a | 2020-09-11 10:44:03 -0700 | [diff] [blame] | 102 | return cmd |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 103 | |
| 104 | cmd = BaseXzCommand(jobs) |
Tiancong Wang | ac3fc4a | 2020-09-11 10:44:03 -0700 | [diff] [blame] | 105 | if stdout: |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 106 | cmd.append('-dc') |
| 107 | else: |
| 108 | cmd.append('-d') |
| 109 | return cmd |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 110 | |
| 111 | |
| 112 | def GetParser(): |
| 113 | """Return a command line parser.""" |
| 114 | parser = commandline.ArgumentParser(description=__doc__) |
| 115 | parser.add_argument( |
Tiancong Wang | ac3fc4a | 2020-09-11 10:44:03 -0700 | [diff] [blame] | 116 | '-d', |
| 117 | '--decompress', |
| 118 | '--uncompress', |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 119 | help='Decompress rather than compress.', |
| 120 | action='store_true') |
Tiancong Wang | ac3fc4a | 2020-09-11 10:44:03 -0700 | [diff] [blame] | 121 | parser.add_argument( |
| 122 | '-c', |
| 123 | dest='stdout', |
| 124 | action='store_true', |
| 125 | help="Write to standard output and don't delete input files.") |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 126 | return parser |
| 127 | |
| 128 | |
Benjamin Gordon | 1f4537f | 2019-12-06 09:10:56 -0700 | [diff] [blame] | 129 | def main(argv): |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 130 | parser = GetParser() |
| 131 | known_args, argv = parser.parse_known_args() |
Tiancong Wang | ac3fc4a | 2020-09-11 10:44:03 -0700 | [diff] [blame] | 132 | if '-i' in argv or '-o' in argv: |
| 133 | parser.error('It is invalid to use -i or -o with xz_auto') |
| 134 | |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 135 | # Use half of our CPUs to avoid starving other processes. |
| 136 | jobs = max(1, os.cpu_count() // 2) |
| 137 | |
Ben Pastene | c6bf549 | 2020-08-28 17:35:01 -0700 | [diff] [blame] | 138 | if known_args.decompress: |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 139 | args = GetDecompressCommand(known_args.stdout, jobs, argv) |
Tiancong Wang | ac3fc4a | 2020-09-11 10:44:03 -0700 | [diff] [blame] | 140 | else: |
George Burgess IV | 407ec66 | 2021-09-22 15:58:35 -0700 | [diff] [blame] | 141 | args = GetCompressCommand(known_args.stdout, jobs, argv) |
| 142 | |
| 143 | os.execvp(args[0], args + argv) |