Nigel Tao | 79a9455 | 2017-11-30 16:37:20 +1100 | [diff] [blame] | 1 | // Copyright 2017 The Wuffs Authors. |
Nigel Tao | d4372cb | 2017-10-12 11:17:41 +1100 | [diff] [blame] | 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 14 | |
Nigel Tao | 788479d | 2021-08-22 10:52:51 +1000 | [diff] [blame] | 15 | //go:build ignore |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 16 | // +build ignore |
| 17 | |
| 18 | package main |
| 19 | |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 20 | // extract-deflate-offsets.go extracts the start and end offsets of the |
| 21 | // deflate-compressed data wrapped in a .gz file. |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 22 | // |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 23 | // Usage: go run extract-deflate-offsets.go foo.gz bar.gz |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 24 | // |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 25 | // Alternatively: go run extract-deflate-offsets.go -write-deflate foo.gz |
Nigel Tao | 547351c | 2017-07-17 16:39:32 +1000 | [diff] [blame] | 26 | // |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 27 | // Alternatively: go run extract-deflate-offsets.go -write-zlib foo.gz |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 28 | |
| 29 | import ( |
| 30 | "bytes" |
| 31 | "compress/flate" |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 32 | "compress/zlib" |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 33 | "crypto/md5" |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 34 | "flag" |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 35 | "fmt" |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 36 | "hash/adler32" |
Nigel Tao | 226c476 | 2021-08-22 11:05:43 +1000 | [diff] [blame] | 37 | "io" |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 38 | "os" |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 39 | "strings" |
| 40 | ) |
| 41 | |
| 42 | var ( |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 43 | writeDeflate = flag.Bool("write-deflate", false, "whether to convert gzip to raw deflate") |
| 44 | writeZlib = flag.Bool("write-zlib", false, "whether to convert gzip to zlib") |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 45 | ) |
| 46 | |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 47 | // GZIP wraps a header and footer around deflate data. The format is described in |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 48 | // RFC 1952: https://www.ietf.org/rfc/rfc1952.txt |
| 49 | const ( |
| 50 | flagText = 1 << 0 |
| 51 | flagHCRC = 1 << 1 |
| 52 | flagExtra = 1 << 2 |
| 53 | flagName = 1 << 3 |
| 54 | flagComment = 1 << 4 |
| 55 | ) |
| 56 | |
| 57 | func main() { |
| 58 | if err := main1(); err != nil { |
| 59 | os.Stderr.WriteString(err.Error() + "\n") |
| 60 | os.Exit(1) |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | func main1() error { |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 65 | flag.Parse() |
| 66 | for _, a := range flag.Args() { |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 67 | if err := decode(a); err != nil { |
| 68 | return err |
| 69 | } |
| 70 | } |
| 71 | return nil |
| 72 | } |
| 73 | |
| 74 | func decode(filename string) error { |
Nigel Tao | 226c476 | 2021-08-22 11:05:43 +1000 | [diff] [blame] | 75 | src, err := os.ReadFile(filename) |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 76 | if err != nil { |
| 77 | return err |
| 78 | } |
| 79 | |
| 80 | const ( |
| 81 | headerSize = 10 |
| 82 | footerSize = 8 |
| 83 | ) |
| 84 | if len(src) < headerSize+footerSize || src[0] != 0x1F || src[1] != 0x8B || src[2] != 0x08 { |
| 85 | return fmt.Errorf("not a GZIP") |
| 86 | } |
| 87 | if len(src) >= 0x10000000 { |
| 88 | return fmt.Errorf("file too large") |
| 89 | } |
| 90 | flags := src[3] |
| 91 | i := headerSize |
| 92 | src = src[:len(src)-footerSize] |
| 93 | |
| 94 | if flags&flagExtra != 0 { |
| 95 | return fmt.Errorf("TODO: support gzip extra flag") |
| 96 | } |
| 97 | |
| 98 | if flags&flagName != 0 { |
| 99 | if i, err = readString(src, i); err != nil { |
| 100 | return err |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | if flags&flagComment != 0 { |
| 105 | if i, err = readString(src, i); err != nil { |
| 106 | return err |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | if flags&flagHCRC != 0 { |
| 111 | return fmt.Errorf("TODO: support gzip HCRC flag") |
| 112 | } |
| 113 | |
Nigel Tao | 2f78804 | 2021-01-23 19:29:19 +1100 | [diff] [blame] | 114 | // As a coherence check, the result should be valid deflate. |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 115 | uncompressed, err := checkDeflate(src[i:]) |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 116 | if err != nil { |
| 117 | return err |
| 118 | } |
| 119 | |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 120 | if *writeDeflate { |
| 121 | return doWriteDeflate(src[i:], uncompressed, filename) |
Nigel Tao | 547351c | 2017-07-17 16:39:32 +1000 | [diff] [blame] | 122 | } else if *writeZlib { |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 123 | return doWriteZlib(src[i:], uncompressed, filename) |
| 124 | } |
| 125 | fmt.Printf("%7d %7d %x %s\n", i, len(src), md5.Sum(uncompressed), filename) |
| 126 | return nil |
| 127 | } |
| 128 | |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 129 | func doWriteDeflate(deflateCompressed []byte, uncompressed []byte, filename string) error { |
Nigel Tao | 547351c | 2017-07-17 16:39:32 +1000 | [diff] [blame] | 130 | if strings.HasSuffix(filename, ".gz") { |
| 131 | filename = filename[:len(filename)-3] |
| 132 | } |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 133 | filename += ".deflate" |
Nigel Tao | 226c476 | 2021-08-22 11:05:43 +1000 | [diff] [blame] | 134 | if err := os.WriteFile(filename, deflateCompressed, 0666); err != nil { |
Nigel Tao | 547351c | 2017-07-17 16:39:32 +1000 | [diff] [blame] | 135 | return err |
| 136 | } |
| 137 | fmt.Printf("wrote %s\n", filename) |
| 138 | return nil |
| 139 | } |
| 140 | |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 141 | func doWriteZlib(deflateCompressed []byte, uncompressed []byte, filename string) error { |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 142 | buf := bytes.NewBuffer(nil) |
| 143 | // The ZLIB header (as per https://www.ietf.org/rfc/rfc1950.txt) is 2 |
| 144 | // bytes. |
| 145 | // |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 146 | // The first byte's low 4 bits is the compression method: 8 means deflate. |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 147 | // The first byte's high 4 bits is the compression info: 7 means a 32KiB |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 148 | // deflate window size. |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 149 | // |
| 150 | // The second byte's low 5 bits are a parity check. The 5th bit (0 in this |
| 151 | // case) indicates a preset dictionary. The high 2 bits (2 in this case) |
| 152 | // means the default compression algorithm. |
| 153 | buf.WriteString("\x78\x9c") |
| 154 | // Write the payload. |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 155 | buf.Write(deflateCompressed) |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 156 | // The ZLIB footer is 4 bytes: a big-endian checksum. |
| 157 | checksum := adler32.Checksum(uncompressed) |
| 158 | buf.WriteByte(uint8(checksum >> 24)) |
| 159 | buf.WriteByte(uint8(checksum >> 16)) |
| 160 | buf.WriteByte(uint8(checksum >> 8)) |
| 161 | buf.WriteByte(uint8(checksum >> 0)) |
| 162 | |
| 163 | asZlib := buf.Bytes() |
| 164 | |
Nigel Tao | 2f78804 | 2021-01-23 19:29:19 +1100 | [diff] [blame] | 165 | // As a coherence check, the result should be valid zlib. |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 166 | if _, err := checkZlib(asZlib); err != nil { |
| 167 | return err |
| 168 | } |
| 169 | |
| 170 | if strings.HasSuffix(filename, ".gz") { |
| 171 | filename = filename[:len(filename)-3] |
| 172 | } |
| 173 | filename += ".zlib" |
Nigel Tao | 226c476 | 2021-08-22 11:05:43 +1000 | [diff] [blame] | 174 | if err := os.WriteFile(filename, asZlib, 0666); err != nil { |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 175 | return err |
| 176 | } |
| 177 | fmt.Printf("wrote %s\n", filename) |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 178 | return nil |
| 179 | } |
| 180 | |
| 181 | func readString(src []byte, i int) (int, error) { |
| 182 | for { |
| 183 | if i >= len(src) { |
| 184 | return 0, fmt.Errorf("bad GZIP string") |
| 185 | } |
| 186 | if src[i] == 0 { |
| 187 | return i + 1, nil |
| 188 | } |
| 189 | i++ |
| 190 | } |
| 191 | } |
| 192 | |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 193 | func checkDeflate(x []byte) ([]byte, error) { |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 194 | rc := flate.NewReader(bytes.NewReader(x)) |
| 195 | defer rc.Close() |
Nigel Tao | 226c476 | 2021-08-22 11:05:43 +1000 | [diff] [blame] | 196 | x, err := io.ReadAll(rc) |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 197 | if err != nil { |
Nigel Tao | 802b9d8 | 2017-12-04 10:43:32 +1100 | [diff] [blame] | 198 | return nil, fmt.Errorf("data is not valid deflate: %v", err) |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 199 | } |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 200 | return x, nil |
| 201 | } |
| 202 | |
| 203 | func checkZlib(x []byte) ([]byte, error) { |
| 204 | rc, err := zlib.NewReader(bytes.NewReader(x)) |
| 205 | if err != nil { |
| 206 | return nil, fmt.Errorf("data is not valid zlib: %v", err) |
| 207 | } |
| 208 | defer rc.Close() |
Nigel Tao | 226c476 | 2021-08-22 11:05:43 +1000 | [diff] [blame] | 209 | x, err = io.ReadAll(rc) |
Nigel Tao | 0357237 | 2017-07-14 12:00:00 +1000 | [diff] [blame] | 210 | if err != nil { |
| 211 | return nil, fmt.Errorf("data is not valid zlib: %v", err) |
| 212 | } |
| 213 | return x, nil |
Nigel Tao | 4efce30 | 2017-07-06 16:35:18 +1000 | [diff] [blame] | 214 | } |