blob: f18b229939f65e9f9c99ab24f30dc72a57faa106 [file] [log] [blame]
Nigel Tao79a94552017-11-30 16:37:20 +11001// Copyright 2017 The Wuffs Authors.
Nigel Taod4372cb2017-10-12 11:17:41 +11002//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
Nigel Tao4efce302017-07-06 16:35:18 +100014
Nigel Tao788479d2021-08-22 10:52:51 +100015//go:build ignore
Nigel Tao4efce302017-07-06 16:35:18 +100016// +build ignore
17
18package main
19
Nigel Tao802b9d82017-12-04 10:43:32 +110020// extract-deflate-offsets.go extracts the start and end offsets of the
21// deflate-compressed data wrapped in a .gz file.
Nigel Tao4efce302017-07-06 16:35:18 +100022//
Nigel Tao802b9d82017-12-04 10:43:32 +110023// Usage: go run extract-deflate-offsets.go foo.gz bar.gz
Nigel Tao03572372017-07-14 12:00:00 +100024//
Nigel Tao802b9d82017-12-04 10:43:32 +110025// Alternatively: go run extract-deflate-offsets.go -write-deflate foo.gz
Nigel Tao547351c2017-07-17 16:39:32 +100026//
Nigel Tao802b9d82017-12-04 10:43:32 +110027// Alternatively: go run extract-deflate-offsets.go -write-zlib foo.gz
Nigel Tao4efce302017-07-06 16:35:18 +100028
29import (
30 "bytes"
31 "compress/flate"
Nigel Tao03572372017-07-14 12:00:00 +100032 "compress/zlib"
Nigel Tao4efce302017-07-06 16:35:18 +100033 "crypto/md5"
Nigel Tao03572372017-07-14 12:00:00 +100034 "flag"
Nigel Tao4efce302017-07-06 16:35:18 +100035 "fmt"
Nigel Tao03572372017-07-14 12:00:00 +100036 "hash/adler32"
Nigel Tao226c4762021-08-22 11:05:43 +100037 "io"
Nigel Tao4efce302017-07-06 16:35:18 +100038 "os"
Nigel Tao03572372017-07-14 12:00:00 +100039 "strings"
40)
41
42var (
Nigel Tao802b9d82017-12-04 10:43:32 +110043 writeDeflate = flag.Bool("write-deflate", false, "whether to convert gzip to raw deflate")
44 writeZlib = flag.Bool("write-zlib", false, "whether to convert gzip to zlib")
Nigel Tao4efce302017-07-06 16:35:18 +100045)
46
Nigel Tao802b9d82017-12-04 10:43:32 +110047// GZIP wraps a header and footer around deflate data. The format is described in
Nigel Tao4efce302017-07-06 16:35:18 +100048// RFC 1952: https://www.ietf.org/rfc/rfc1952.txt
49const (
50 flagText = 1 << 0
51 flagHCRC = 1 << 1
52 flagExtra = 1 << 2
53 flagName = 1 << 3
54 flagComment = 1 << 4
55)
56
57func main() {
58 if err := main1(); err != nil {
59 os.Stderr.WriteString(err.Error() + "\n")
60 os.Exit(1)
61 }
62}
63
64func main1() error {
Nigel Tao03572372017-07-14 12:00:00 +100065 flag.Parse()
66 for _, a := range flag.Args() {
Nigel Tao4efce302017-07-06 16:35:18 +100067 if err := decode(a); err != nil {
68 return err
69 }
70 }
71 return nil
72}
73
74func decode(filename string) error {
Nigel Tao226c4762021-08-22 11:05:43 +100075 src, err := os.ReadFile(filename)
Nigel Tao4efce302017-07-06 16:35:18 +100076 if err != nil {
77 return err
78 }
79
80 const (
81 headerSize = 10
82 footerSize = 8
83 )
84 if len(src) < headerSize+footerSize || src[0] != 0x1F || src[1] != 0x8B || src[2] != 0x08 {
85 return fmt.Errorf("not a GZIP")
86 }
87 if len(src) >= 0x10000000 {
88 return fmt.Errorf("file too large")
89 }
90 flags := src[3]
91 i := headerSize
92 src = src[:len(src)-footerSize]
93
94 if flags&flagExtra != 0 {
95 return fmt.Errorf("TODO: support gzip extra flag")
96 }
97
98 if flags&flagName != 0 {
99 if i, err = readString(src, i); err != nil {
100 return err
101 }
102 }
103
104 if flags&flagComment != 0 {
105 if i, err = readString(src, i); err != nil {
106 return err
107 }
108 }
109
110 if flags&flagHCRC != 0 {
111 return fmt.Errorf("TODO: support gzip HCRC flag")
112 }
113
Nigel Tao2f788042021-01-23 19:29:19 +1100114 // As a coherence check, the result should be valid deflate.
Nigel Tao802b9d82017-12-04 10:43:32 +1100115 uncompressed, err := checkDeflate(src[i:])
Nigel Tao4efce302017-07-06 16:35:18 +1000116 if err != nil {
117 return err
118 }
119
Nigel Tao802b9d82017-12-04 10:43:32 +1100120 if *writeDeflate {
121 return doWriteDeflate(src[i:], uncompressed, filename)
Nigel Tao547351c2017-07-17 16:39:32 +1000122 } else if *writeZlib {
Nigel Tao03572372017-07-14 12:00:00 +1000123 return doWriteZlib(src[i:], uncompressed, filename)
124 }
125 fmt.Printf("%7d %7d %x %s\n", i, len(src), md5.Sum(uncompressed), filename)
126 return nil
127}
128
Nigel Tao802b9d82017-12-04 10:43:32 +1100129func doWriteDeflate(deflateCompressed []byte, uncompressed []byte, filename string) error {
Nigel Tao547351c2017-07-17 16:39:32 +1000130 if strings.HasSuffix(filename, ".gz") {
131 filename = filename[:len(filename)-3]
132 }
Nigel Tao802b9d82017-12-04 10:43:32 +1100133 filename += ".deflate"
Nigel Tao226c4762021-08-22 11:05:43 +1000134 if err := os.WriteFile(filename, deflateCompressed, 0666); err != nil {
Nigel Tao547351c2017-07-17 16:39:32 +1000135 return err
136 }
137 fmt.Printf("wrote %s\n", filename)
138 return nil
139}
140
Nigel Tao802b9d82017-12-04 10:43:32 +1100141func doWriteZlib(deflateCompressed []byte, uncompressed []byte, filename string) error {
Nigel Tao03572372017-07-14 12:00:00 +1000142 buf := bytes.NewBuffer(nil)
143 // The ZLIB header (as per https://www.ietf.org/rfc/rfc1950.txt) is 2
144 // bytes.
145 //
Nigel Tao802b9d82017-12-04 10:43:32 +1100146 // The first byte's low 4 bits is the compression method: 8 means deflate.
Nigel Tao03572372017-07-14 12:00:00 +1000147 // The first byte's high 4 bits is the compression info: 7 means a 32KiB
Nigel Tao802b9d82017-12-04 10:43:32 +1100148 // deflate window size.
Nigel Tao03572372017-07-14 12:00:00 +1000149 //
150 // The second byte's low 5 bits are a parity check. The 5th bit (0 in this
151 // case) indicates a preset dictionary. The high 2 bits (2 in this case)
152 // means the default compression algorithm.
153 buf.WriteString("\x78\x9c")
154 // Write the payload.
Nigel Tao802b9d82017-12-04 10:43:32 +1100155 buf.Write(deflateCompressed)
Nigel Tao03572372017-07-14 12:00:00 +1000156 // The ZLIB footer is 4 bytes: a big-endian checksum.
157 checksum := adler32.Checksum(uncompressed)
158 buf.WriteByte(uint8(checksum >> 24))
159 buf.WriteByte(uint8(checksum >> 16))
160 buf.WriteByte(uint8(checksum >> 8))
161 buf.WriteByte(uint8(checksum >> 0))
162
163 asZlib := buf.Bytes()
164
Nigel Tao2f788042021-01-23 19:29:19 +1100165 // As a coherence check, the result should be valid zlib.
Nigel Tao03572372017-07-14 12:00:00 +1000166 if _, err := checkZlib(asZlib); err != nil {
167 return err
168 }
169
170 if strings.HasSuffix(filename, ".gz") {
171 filename = filename[:len(filename)-3]
172 }
173 filename += ".zlib"
Nigel Tao226c4762021-08-22 11:05:43 +1000174 if err := os.WriteFile(filename, asZlib, 0666); err != nil {
Nigel Tao03572372017-07-14 12:00:00 +1000175 return err
176 }
177 fmt.Printf("wrote %s\n", filename)
Nigel Tao4efce302017-07-06 16:35:18 +1000178 return nil
179}
180
181func readString(src []byte, i int) (int, error) {
182 for {
183 if i >= len(src) {
184 return 0, fmt.Errorf("bad GZIP string")
185 }
186 if src[i] == 0 {
187 return i + 1, nil
188 }
189 i++
190 }
191}
192
Nigel Tao802b9d82017-12-04 10:43:32 +1100193func checkDeflate(x []byte) ([]byte, error) {
Nigel Tao4efce302017-07-06 16:35:18 +1000194 rc := flate.NewReader(bytes.NewReader(x))
195 defer rc.Close()
Nigel Tao226c4762021-08-22 11:05:43 +1000196 x, err := io.ReadAll(rc)
Nigel Tao4efce302017-07-06 16:35:18 +1000197 if err != nil {
Nigel Tao802b9d82017-12-04 10:43:32 +1100198 return nil, fmt.Errorf("data is not valid deflate: %v", err)
Nigel Tao4efce302017-07-06 16:35:18 +1000199 }
Nigel Tao03572372017-07-14 12:00:00 +1000200 return x, nil
201}
202
203func checkZlib(x []byte) ([]byte, error) {
204 rc, err := zlib.NewReader(bytes.NewReader(x))
205 if err != nil {
206 return nil, fmt.Errorf("data is not valid zlib: %v", err)
207 }
208 defer rc.Close()
Nigel Tao226c4762021-08-22 11:05:43 +1000209 x, err = io.ReadAll(rc)
Nigel Tao03572372017-07-14 12:00:00 +1000210 if err != nil {
211 return nil, fmt.Errorf("data is not valid zlib: %v", err)
212 }
213 return x, nil
Nigel Tao4efce302017-07-06 16:35:18 +1000214}