Blame - script/print-lzw-example.go - skia.googlesource.com/external/github.com/google/wuffs

blob: ab09d6a5b589691241ff81c7edd500062b3541cd [file] [log] [blame]

Nigel Tao	b0c7286	2018-10-21 16:03:47 +1100	[diff] [blame^]	1	// Copyright 2018 The Wuffs Authors.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// https://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	// +build ignore
				16
				17	package main
				18
				19	// print-lzw-example.go prints a worked example discussed in std/lzw/README.md,
				20	// based on three implementations of a simplified core of the LZW algorithm.
				21	// Simplifications include assuming LSB-first, 8 bit literals, no clear codes,
				22	// no full tables and that the input is given in terms of a code stream, not a
				23	// bit stream.
				24	//
				25	// The implementations are designed for ease of studying, and for e.g. a
				26	// minimal diff between the suf1 and sufQ implementations, and are not
				27	// optimized for performance.
				28	//
				29	// Usage: go run print-lzw-example.go
				30	//
				31	// You can also play with the code (e.g. modify Q and re-run) online:
				32	// https://play.golang.org/p/1aLC_XHZzoA
				33
				34	import (
				35	"fmt"
				36	)
				37
				38	const (
				39	expectedOutput = "TOBEORNOTTOBEORTOBEORNOTXOTXOTXOOTXOOOTXOOOTOBEY"
				40
				41	clearCode = 0x100
				42	endCode = 0x101
				43
				44	// Neither clearCode or endCode can ever be a valid prefix code. We
				45	// arbitrarily pick clearCode to represent "no prefix".
				46	noPrefix = clearCode
				47
				48	// Q is the maximum possible suffix length, in bytes, which equals 1 plus
				49	// the maximum possible "suffix length minus 1". Q == 1 means that sufQ is
				50	// equivalent to suf1.
				51	//
				52	// In std/lzw/README.md, Q is 3 to keep the example short and simple. In
				53	// practice, especially on modern CPUs that can do unaligned 32 or 64 bit
				54	// loads and stores, Q is 4 or 8.
				55	Q = 3
				56	)
				57
				58	var codes = []int{
				59	'T', 'O', 'B', 'E', 'O', 'R', 'N', 'O', 'T',
				60	0x102, 0x104, 0x106, 0x10B, 0x105, 0x107, 0x109,
				61	'X',
				62	0x111, 0x113, 0x114, 0x115, 0x10E,
				63	'Y',
				64	0x101,
				65	}
				66
				67	func codeString(code int) string {
				68	if code < clearCode {
				69	return fmt.Sprintf("%5c", code)
				70	} else if code == noPrefix {
				71	return " -"
				72	}
				73	return fmt.Sprintf("0x%03X", code)
				74	}
				75
				76	func main() {
				77	n, s, q := new(naive), new(suf1), new(sufQ)
				78	decode(n)
				79	decode(s)
				80	decode(q)
				81
				82	key := endCode
				83	fmt.Printf(" Code Emits Key Value Pre1+Suf1 LM1 /Q %%Q PreQ+SufQ\n")
				84	for _, code := range codes {
				85	// Code
				86	fmt.Printf("%s", codeString(code))
				87
				88	// Emits
				89	if code == endCode {
				90	fmt.Printf(" -end-\n")
				91	break
				92	}
				93	fmt.Printf(" %6s", n[code])
				94
				95	if key != endCode {
				96	fmt.Printf(" 0x%03X %7s %s %c %3d %3d %3d %s %s",
				97	key, // Key
				98	n[key], // Value
				99	codeString(int(s.prefixes[key])), // Pre1
				100	s.suffixes[key], // Suf1
				101	q.lengthM1s[key], // LM1
				102	q.lengthM1s[key]/Q, // /Q
				103	q.lengthM1s[key]%Q, // %Q
				104	codeString(int(q.prefixes[key])), // PreQ
				105	string(q.suffixes[key][:]), // SufQ
				106	)
				107	}
				108
				109	fmt.Println()
				110	key++
				111	}
				112	}
				113
				114	type implementation interface {
				115	initialize()
				116	emit(buffer []byte, code int, prevCode int, n int) []byte
				117	}
				118
				119	func decode(impl implementation) {
				120	impl.initialize()
				121
				122	output, prevCode, n := []byte(nil), -1, 0x101
				123	for _, code := range codes {
				124	if code == clearCode {
				125	panic("unimplemented clearCode")
				126	} else if code == endCode {
				127	if string(output) != expectedOutput {
				128	panic("unexpected output")
				129	}
				130	return
				131	} else if code > n {
				132	panic("invalid code")
				133	}
				134
				135	// We have a literal or copy code.
				136	output = impl.emit(output, code, prevCode, n)
				137
				138	prevCode, n = code, n+1
				139	if n >= 4096 {
				140	panic("unimplemented table-is-full")
				141	}
				142	}
				143	panic("missing endCode")
				144	}
				145
				146	// naive is a simple implementation that, in the worst case, requires O(N*N)
				147	// memory.
				148	type naive [4096]string
				149
				150	func (t *naive) initialize() {
				151	for i := 0; i < clearCode; i++ {
				152	t[i] = string([]byte{byte(i)})
				153	}
				154	}
				155
				156	func (t *naive) emit(output []byte, code int, prevCode int, n int) []byte {
				157	if prevCode >= 0 {
				158	prevOutput := t[prevCode]
				159	if code < n {
				160	t[n] = prevOutput + t[code][:1]
				161	} else {
				162	t[n] = prevOutput + prevOutput[:1]
				163	}
				164	}
				165	return append(output, t[code]...)
				166	}
				167
				168	// suf1 keeps separate prefix and suffix tables, 1 byte per suffix.
				169	type suf1 struct {
				170	prefixes [4096]uint16
				171	suffixes [4096]byte
				172	}
				173
				174	func (t *suf1) initialize() {
				175	for i := 0; i < clearCode; i++ {
				176	t.prefixes[i] = noPrefix
				177	t.suffixes[i] = byte(i)
				178	}
				179	}
				180
				181	func (t *suf1) emit(output []byte, code int, prevCode int, n int) []byte {
				182	// Fill an intermediate buffer from back to front, 1 byte at a time.
				183	buffer := [4096]byte{}
				184	i := len(buffer)
				185
				186	c := code
				187	if code == n {
				188	c = prevCode
				189	i--
				190	// buffer[4095] will be set later.
				191	}
				192
				193	firstByte := byte(0)
				194	for {
				195	suffix := t.suffixes[c]
				196	i--
				197	buffer[i] = suffix
				198	c = int(t.prefixes[c])
				199	if c == noPrefix {
				200	firstByte = suffix
				201	break
				202	}
				203	}
				204
				205	if code == n {
				206	buffer[4095] = firstByte
				207	}
				208
				209	// The "if prevCode >= 0" guard, and initializing prevCode to -1 instead of
				210	// 0, is correct conceptually, but unnecessary in practice. Look for "fake
				211	// key-value pair" in std/lzw/README.md.
				212	if prevCode >= 0 {
				213	t.prefixes[n] = uint16(prevCode)
				214	t.suffixes[n] = firstByte
				215	}
				216
				217	return append(output, buffer[i:4096]...)
				218	}
				219
				220	// sufQ keeps separate prefix and suffix tables, up to Q bytes per suffix.
				221	type sufQ struct {
				222	lengthM1s [4096]uint16
				223	prefixes [4096]uint16
				224	suffixes [4096][Q]byte
				225	}
				226
				227	func (t *sufQ) initialize() {
				228	for i := 0; i < clearCode; i++ {
				229	t.lengthM1s[i] = 0
				230	t.prefixes[i] = noPrefix
				231	t.suffixes[i][0] = byte(i)
				232	for j := 1; j < Q; j++ {
				233	// Unnecessary for correct output, but makes the printed table prettier.
				234	t.suffixes[i][j] = '.'
				235	}
				236	}
				237	}
				238
				239	func (t *sufQ) emit(output []byte, code int, prevCode int, n int) []byte {
				240	// Fill an intermediate buffer from back to front, Q bytes at a time.
				241	buffer := [4096 + Q - 1]byte{}
				242	i := len(buffer)
				243
				244	c := code
				245	if code == n {
				246	c = prevCode
				247	i--
				248	// buffer[4095] will be set later.
				249	}
				250
				251	i -= int(t.lengthM1s[c] % Q)
				252
				253	firstByte := byte(0)
				254	for {
				255	suffix := t.suffixes[c]
				256	i -= Q
				257	copy(buffer[i:i+Q], suffix[:])
				258	c = int(t.prefixes[c])
				259	if c == noPrefix {
				260	firstByte = suffix[0]
				261	break
				262	}
				263	}
				264
				265	if code == n {
				266	buffer[4095] = firstByte
				267	}
				268
				269	// The "if prevCode >= 0" guard, and initializing prevCode to -1 instead of
				270	// 0, is correct conceptually, but unnecessary in practice. Look for "fake
				271	// key-value pair" in std/lzw/README.md.
				272	if prevCode >= 0 {
				273	lm1 := t.lengthM1s[prevCode] + 1
				274	t.lengthM1s[n] = lm1
				275	lm1 %= Q
				276	if lm1 != 0 {
				277	// Copy the prevCode's prefix and suffix, and then extend the suffix.
				278	t.prefixes[n] = t.prefixes[prevCode]
				279	t.suffixes[n] = t.suffixes[prevCode]
				280	t.suffixes[n][lm1] = firstByte
				281	} else {
				282	// Start a new suffix.
				283	t.prefixes[n] = uint16(prevCode)
				284	t.suffixes[n][0] = firstByte
				285	for j := 1; j < Q; j++ {
				286	// Unnecessary for correct output, but makes the printed table prettier.
				287	t.suffixes[n][j] = '.'
				288	}
				289	}
				290	}
				291
				292	return append(output, buffer[i:4096]...)
				293	}