blob: 6001e948fab78c7c949f5d94ed1246399baf474c [file] [log] [blame]
Nigel Taob0c72862018-10-21 16:03:47 +11001// Copyright 2018 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Nigel Tao788479d2021-08-22 10:52:51 +100015//go:build ignore
Nigel Taob0c72862018-10-21 16:03:47 +110016// +build ignore
17
18package main
19
20// print-lzw-example.go prints a worked example discussed in std/lzw/README.md,
21// based on three implementations of a simplified core of the LZW algorithm.
22// Simplifications include assuming LSB-first, 8 bit literals, no clear codes,
23// no full tables and that the input is given in terms of a code stream, not a
24// bit stream.
25//
26// The implementations are designed for ease of studying, and for e.g. a
27// minimal diff between the suf1 and sufQ implementations, and are not
28// optimized for performance.
29//
30// Usage: go run print-lzw-example.go
31//
32// You can also play with the code (e.g. modify Q and re-run) online:
33// https://play.golang.org/p/1aLC_XHZzoA
34
35import (
36 "fmt"
Nigel Taod587c9e2020-03-29 22:10:49 +110037 "os"
Nigel Taob0c72862018-10-21 16:03:47 +110038)
39
40const (
41 expectedOutput = "TOBEORNOTTOBEORTOBEORNOTXOTXOTXOOTXOOOTXOOOTOBEY"
42
43 clearCode = 0x100
44 endCode = 0x101
45
46 // Neither clearCode or endCode can ever be a valid prefix code. We
47 // arbitrarily pick clearCode to represent "no prefix".
48 noPrefix = clearCode
49
50 // Q is the maximum possible suffix length, in bytes, which equals 1 plus
51 // the maximum possible "suffix length minus 1". Q == 1 means that sufQ is
52 // equivalent to suf1.
53 //
54 // In std/lzw/README.md, Q is 3 to keep the example short and simple. In
55 // practice, especially on modern CPUs that can do unaligned 32 or 64 bit
56 // loads and stores, Q is 4 or 8.
57 Q = 3
58)
59
60var codes = []int{
61 'T', 'O', 'B', 'E', 'O', 'R', 'N', 'O', 'T',
62 0x102, 0x104, 0x106, 0x10B, 0x105, 0x107, 0x109,
63 'X',
64 0x111, 0x113, 0x114, 0x115, 0x10E,
65 'Y',
66 0x101,
67}
68
69func codeString(code int) string {
70 if code < clearCode {
71 return fmt.Sprintf("%5c", code)
72 } else if code == noPrefix {
73 return " -"
74 }
75 return fmt.Sprintf("0x%03X", code)
76}
77
78func main() {
Nigel Taod587c9e2020-03-29 22:10:49 +110079 if err := main1(); err != nil {
80 os.Stderr.WriteString(err.Error() + "\n")
81 os.Exit(1)
82 }
83}
84
85func main1() error {
Nigel Taob0c72862018-10-21 16:03:47 +110086 n, s, q := new(naive), new(suf1), new(sufQ)
87 decode(n)
88 decode(s)
89 decode(q)
90
91 key := endCode
92 fmt.Printf(" Code Emits Key Value Pre1+Suf1 LM1 /Q %%Q PreQ+SufQ\n")
93 for _, code := range codes {
94 // Code
95 fmt.Printf("%s", codeString(code))
96
97 // Emits
98 if code == endCode {
99 fmt.Printf(" -end-\n")
100 break
101 }
102 fmt.Printf(" %6s", n[code])
103
104 if key != endCode {
105 fmt.Printf(" 0x%03X %7s %s %c %3d %3d %3d %s %s",
106 key, // Key
107 n[key], // Value
108 codeString(int(s.prefixes[key])), // Pre1
109 s.suffixes[key], // Suf1
110 q.lengthM1s[key], // LM1
111 q.lengthM1s[key]/Q, // /Q
112 q.lengthM1s[key]%Q, // %Q
113 codeString(int(q.prefixes[key])), // PreQ
114 string(q.suffixes[key][:]), // SufQ
115 )
116 }
117
118 fmt.Println()
119 key++
120 }
Nigel Taod587c9e2020-03-29 22:10:49 +1100121 return nil
Nigel Taob0c72862018-10-21 16:03:47 +1100122}
123
124type implementation interface {
125 initialize()
126 emit(buffer []byte, code int, prevCode int, n int) []byte
127}
128
129func decode(impl implementation) {
130 impl.initialize()
131
132 output, prevCode, n := []byte(nil), -1, 0x101
133 for _, code := range codes {
134 if code == clearCode {
135 panic("unimplemented clearCode")
136 } else if code == endCode {
137 if string(output) != expectedOutput {
138 panic("unexpected output")
139 }
140 return
141 } else if code > n {
142 panic("invalid code")
143 }
144
145 // We have a literal or copy code.
146 output = impl.emit(output, code, prevCode, n)
147
148 prevCode, n = code, n+1
149 if n >= 4096 {
150 panic("unimplemented table-is-full")
151 }
152 }
153 panic("missing endCode")
154}
155
156// naive is a simple implementation that, in the worst case, requires O(N*N)
157// memory.
158type naive [4096]string
159
160func (t *naive) initialize() {
161 for i := 0; i < clearCode; i++ {
162 t[i] = string([]byte{byte(i)})
163 }
164}
165
166func (t *naive) emit(output []byte, code int, prevCode int, n int) []byte {
167 if prevCode >= 0 {
168 prevOutput := t[prevCode]
169 if code < n {
170 t[n] = prevOutput + t[code][:1]
171 } else {
172 t[n] = prevOutput + prevOutput[:1]
173 }
174 }
175 return append(output, t[code]...)
176}
177
178// suf1 keeps separate prefix and suffix tables, 1 byte per suffix.
179type suf1 struct {
180 prefixes [4096]uint16
181 suffixes [4096]byte
182}
183
184func (t *suf1) initialize() {
185 for i := 0; i < clearCode; i++ {
186 t.prefixes[i] = noPrefix
187 t.suffixes[i] = byte(i)
188 }
189}
190
191func (t *suf1) emit(output []byte, code int, prevCode int, n int) []byte {
192 // Fill an intermediate buffer from back to front, 1 byte at a time.
193 buffer := [4096]byte{}
194 i := len(buffer)
195
196 c := code
197 if code == n {
198 c = prevCode
199 i--
200 // buffer[4095] will be set later.
201 }
202
203 firstByte := byte(0)
204 for {
205 suffix := t.suffixes[c]
206 i--
207 buffer[i] = suffix
208 c = int(t.prefixes[c])
209 if c == noPrefix {
210 firstByte = suffix
211 break
212 }
213 }
214
215 if code == n {
216 buffer[4095] = firstByte
217 }
218
219 // The "if prevCode >= 0" guard, and initializing prevCode to -1 instead of
220 // 0, is correct conceptually, but unnecessary in practice. Look for "fake
221 // key-value pair" in std/lzw/README.md.
222 if prevCode >= 0 {
223 t.prefixes[n] = uint16(prevCode)
224 t.suffixes[n] = firstByte
225 }
226
227 return append(output, buffer[i:4096]...)
228}
229
230// sufQ keeps separate prefix and suffix tables, up to Q bytes per suffix.
231type sufQ struct {
232 lengthM1s [4096]uint16
233 prefixes [4096]uint16
234 suffixes [4096][Q]byte
235}
236
237func (t *sufQ) initialize() {
238 for i := 0; i < clearCode; i++ {
239 t.lengthM1s[i] = 0
240 t.prefixes[i] = noPrefix
241 t.suffixes[i][0] = byte(i)
242 for j := 1; j < Q; j++ {
243 // Unnecessary for correct output, but makes the printed table prettier.
244 t.suffixes[i][j] = '.'
245 }
246 }
247}
248
249func (t *sufQ) emit(output []byte, code int, prevCode int, n int) []byte {
250 // Fill an intermediate buffer from back to front, Q bytes at a time.
251 buffer := [4096 + Q - 1]byte{}
252 i := len(buffer)
253
254 c := code
255 if code == n {
256 c = prevCode
257 i--
258 // buffer[4095] will be set later.
259 }
260
261 i -= int(t.lengthM1s[c] % Q)
262
263 firstByte := byte(0)
264 for {
265 suffix := t.suffixes[c]
266 i -= Q
267 copy(buffer[i:i+Q], suffix[:])
268 c = int(t.prefixes[c])
269 if c == noPrefix {
270 firstByte = suffix[0]
271 break
272 }
273 }
274
275 if code == n {
276 buffer[4095] = firstByte
277 }
278
279 // The "if prevCode >= 0" guard, and initializing prevCode to -1 instead of
280 // 0, is correct conceptually, but unnecessary in practice. Look for "fake
281 // key-value pair" in std/lzw/README.md.
282 if prevCode >= 0 {
283 lm1 := t.lengthM1s[prevCode] + 1
284 t.lengthM1s[n] = lm1
285 lm1 %= Q
286 if lm1 != 0 {
287 // Copy the prevCode's prefix and suffix, and then extend the suffix.
288 t.prefixes[n] = t.prefixes[prevCode]
289 t.suffixes[n] = t.suffixes[prevCode]
290 t.suffixes[n][lm1] = firstByte
291 } else {
292 // Start a new suffix.
293 t.prefixes[n] = uint16(prevCode)
294 t.suffixes[n][0] = firstByte
295 for j := 1; j < Q; j++ {
296 // Unnecessary for correct output, but makes the printed table prettier.
297 t.suffixes[n][j] = '.'
298 }
299 }
300 }
301
302 return append(output, buffer[i:4096]...)
303}