Nigel Tao | ff7bffc | 2019-10-13 23:23:48 +1100 | [diff] [blame] | 1 | // Copyright 2019 The Wuffs Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
Nigel Tao | 788479d | 2021-08-22 10:52:51 +1000 | [diff] [blame] | 15 | //go:build ignore |
Nigel Tao | ff7bffc | 2019-10-13 23:23:48 +1100 | [diff] [blame] | 16 | // +build ignore |
| 17 | |
| 18 | package main |
| 19 | |
| 20 | // print-deflate-huff-table-size.go prints the worst-case number of elements in |
| 21 | // the std/deflate decoder's huffs tables. |
| 22 | // |
| 23 | // Usage: go run print-deflate-huff-table-size.go |
| 24 | // |
| 25 | // This program might take tens of seconds to run. |
| 26 | // |
| 27 | // It is algorithmically similar to examples/enough.c in the zlib C library. |
| 28 | // Much of the initial commentary in that file is also relevant for this |
| 29 | // program. Not all of it applies, though. For example, the Wuffs decoder |
| 30 | // implementation's primary (root) table length is fixed at compile time. It |
| 31 | // does not grow that at run-time, even if the shortest Huffman code is longer |
| 32 | // than that primary length. |
| 33 | // |
| 34 | // This program should print: |
| 35 | // |
| 36 | // ---------------- |
| 37 | // -------- Lit/Len (up to 286 symbols, 149142 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15 |
| 38 | // primLen 3: 4376 entries = 8 prim + 4368 seco 1 0 0; 1 1 1 17 1 1 257 1 1 1 1 2 |
| 39 | // primLen 4: 2338 entries = 16 prim + 2322 seco 0 0 0 0; 3 1 1 177 97 1 1 1 1 1 2 |
| 40 | // primLen 5: 1330 entries = 32 prim + 1298 seco 1 0 0 0 0; 3 1 1 177 97 1 1 1 1 2 |
| 41 | // primLen 6: 852 entries = 64 prim + 788 seco 0 0 0 0 0 0; 1 229 49 1 1 1 1 1 2 |
| 42 | // primLen 7: 660 entries = 128 prim + 532 seco 1 0 0 0 0 0 0; 1 229 49 1 1 1 1 2 |
| 43 | // primLen 8: 660 entries = 256 prim + 404 seco 1 1 0 0 0 0 0 0; 1 229 49 1 1 1 2 |
| 44 | // primLen 9: 852 entries = 512 prim + 340 seco 1 1 1 0 0 0 0 0 0; 1 229 49 1 1 2 |
| 45 | // primLen 10: 1332 entries = 1024 prim + 308 seco 1 1 1 1 0 0 0 0 0 0; 1 229 49 1 2 |
| 46 | // primLen 11: 2340 entries = 2048 prim + 292 seco 1 1 1 1 1 0 0 0 0 0 0; 1 229 49 2 |
| 47 | // primLen 12: 4380 entries = 4096 prim + 284 seco 1 1 1 1 1 1 0 0 0 0 0 0; 1 229 50 |
| 48 | // primLen 13: 8472 entries = 8192 prim + 280 seco 1 1 1 1 1 1 0 1 0 0 0 0 0;105 174 |
| 49 | // primLen 14: 16658 entries = 16384 prim + 274 seco 1 1 1 1 1 1 0 1 1 1 0 1 1 1;274 |
| 50 | // primLen 15: 32768 entries = 32768 prim + 0 seco 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
| 51 | // |
| 52 | // -------- Distance (up to 30 symbols, 1666 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15 |
| 53 | // primLen 3: 4120 entries = 8 prim + 4112 seco 1 0 0; 1 9 9 1 1 1 1 1 1 1 1 2 |
| 54 | // primLen 4: 2080 entries = 16 prim + 2064 seco 1 1 0 0; 1 9 9 1 1 1 1 1 1 1 2 |
| 55 | // primLen 5: 1072 entries = 32 prim + 1040 seco 1 1 1 0 0; 1 9 9 1 1 1 1 1 1 2 |
| 56 | // primLen 6: 592 entries = 64 prim + 528 seco 1 1 1 1 0 0; 1 9 9 1 1 1 1 1 2 |
| 57 | // primLen 7: 400 entries = 128 prim + 272 seco 1 1 1 1 1 0 0; 1 9 9 1 1 1 1 2 |
| 58 | // primLen 8: 400 entries = 256 prim + 144 seco 1 1 1 1 1 1 0 0; 1 9 9 1 1 1 2 |
| 59 | // primLen 9: 592 entries = 512 prim + 80 seco 1 1 1 1 1 1 1 0 0; 1 9 9 1 1 2 |
| 60 | // primLen 10: 1072 entries = 1024 prim + 48 seco 1 1 1 1 1 1 1 1 0 0; 1 9 9 1 2 |
| 61 | // primLen 11: 2080 entries = 2048 prim + 32 seco 1 1 1 1 1 1 1 1 1 0 0; 1 9 9 2 |
| 62 | // primLen 12: 4120 entries = 4096 prim + 24 seco 1 1 1 1 1 1 1 1 1 1 0 0; 1 9 10 |
| 63 | // primLen 13: 8212 entries = 8192 prim + 20 seco 1 1 1 1 1 1 1 1 1 1 0 1 0; 5 14 |
| 64 | // primLen 14: 16400 entries = 16384 prim + 16 seco 1 1 1 1 1 1 1 1 1 1 1 0 0 0; 16 |
| 65 | // primLen 15: 32768 entries = 32768 prim + 0 seco 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
| 66 | // ---------------- |
| 67 | // |
| 68 | // Deflate's canonical Huffman codes are up to 15 bits long, but the decoder |
| 69 | // can use fewer than (1 << 15) entries in its look-up table, by splitting it |
| 70 | // into one primary table (of an at-compile-time fixed length <= 15) and zero |
| 71 | // or more secondary tables (of variable lengths, determined at-run-time). |
| 72 | // |
| 73 | // Symbols whose Huffman codes' length are less than or equal to the primary |
| 74 | // table length do not require any secondary tables. Longer codes are grouped |
| 75 | // by their primLen length prefix. Each group occupies one entry in the primary |
| 76 | // table, which redirects to a secondary table of size (1 << (N - primLen)), |
| 77 | // where N is the length of the longest code in that group. |
| 78 | // |
| 79 | // This program calculates that, for a primary table length of 9 bits, the |
| 80 | // worst case Huffman table size is 852 for the Literal/Length table (852 being |
| 81 | // a 512 entry primary table and the secondary tables totalling 340 further |
| 82 | // entries) and 592 (512 + 80) for the Distance table. |
| 83 | // |
| 84 | // Copy/pasting one row of that output (and the column headers): |
| 85 | // |
| 86 | // -------- Lit/Len (up to 286 symbols, 149142 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15 |
| 87 | // primLen 9: 852 entries = 512 prim + 340 seco 1 1 1 0 0 0 0 0 0; 1 229 49 1 1 2 |
| 88 | // |
| 89 | // The "@1, @2, @3, ..." columns mean that one combination that hits that 852 |
| 90 | // worst case is having 1 1-length code, 1 2-length code, 1 3-length code, 0 |
| 91 | // 4-length codes, ..., 1 10-length code, 229 11-length codes, etc. There are a |
| 92 | // total of 286 (1 + 1 + 1 + 0 + ... + 49 + 1 + 1 + 2) codes, for 286 symbols. |
| 93 | // |
| 94 | // See test/data/artificial/deflate-huffman-primlen-9.deflate* for an actual |
| 95 | // example of valid Deflate-formatted data that exercises these worst-case code |
| 96 | // lengths (for a primary table length of 9 bits). |
| 97 | // |
| 98 | // Hypothetically, suppose that we used a primLen of 13 for the Distance table. |
| 99 | // Here is a code length assignment that produces 20 secondary table entries: |
| 100 | // |
| 101 | // -------- Distance (up to 30 symbols, 1666 combos) @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15 |
| 102 | // primLen 13: 8212 entries = 8192 prim + 20 seco 1 1 1 1 1 1 1 1 1 1 0 1 0; 5 14 |
| 103 | // |
| 104 | // In detail: call the 14 length codes "e0", "e1", ..., "e4" and the 15 length |
| 105 | // codes "f00", "f01", "f02", ..., "f13". |
| 106 | // |
| 107 | // Recall that secondary tables' lengths are (1 << (N - primLen)), where N is |
| 108 | // the longest code for a given primLen length prefix. In this case, (N - |
| 109 | // primLen) is (15 - 13) if the secondary table contains a "f??" code, |
| 110 | // otherwise it N is (14 - 13). When the secondary table contains a mixture of |
| 111 | // lengths, some of the shorter codes' entries are duplicated. |
| 112 | // |
| 113 | // Also, for canonical Huffman codes, the codes (bitstrings) for 14-length |
| 114 | // codes are all assigned (sequentially) before any 15-length codes. There are |
| 115 | // therefore 6 secondary tables: |
| 116 | // |
| 117 | // - length 2: e0, e1. |
| 118 | // - length 2: e2, e3. |
| 119 | // - length 4: e4, e4, f00, f01. |
| 120 | // - length 4: f02, f03, f04, f05. |
| 121 | // - length 4: f06, f07, f08, f09. |
| 122 | // - length 4: f10, f11, f12, f13. |
| 123 | // |
| 124 | // The total size of the secondary tables is (2 + 2 + 4 + 4 + 4 + 4) = 20. |
| 125 | |
| 126 | import ( |
| 127 | "fmt" |
| 128 | "os" |
| 129 | "sort" |
| 130 | ) |
| 131 | |
| 132 | const ( |
| 133 | maxMaxNSyms = 286 // RFC 1951 lists 286 literal/length codes and 30 distance codes. |
| 134 | maxCodeLen = 15 // Each code's encoding is at most 15 bits. |
| 135 | ) |
| 136 | |
| 137 | func main() { |
| 138 | if err := main1(); err != nil { |
| 139 | os.Stderr.WriteString(err.Error() + "\n") |
| 140 | os.Exit(1) |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | func main1() error { |
| 145 | do("Lit/Len", 286) |
| 146 | do("Distance", 30) |
| 147 | return nil |
| 148 | } |
| 149 | |
| 150 | func do(name string, maxNSyms uint32) { |
| 151 | ftab := &feasibleTable{} |
| 152 | ftab.build(maxNSyms) |
| 153 | |
| 154 | // We can do the per-primLen computations concurrently, speeding up the |
| 155 | // overall wall time taken. |
| 156 | ch := make(chan string) |
| 157 | numPending := 0 |
| 158 | for primLen := uint32(3); primLen <= maxCodeLen; primLen++ { |
| 159 | go doPrimLen(ch, maxNSyms, ftab, primLen) |
| 160 | numPending++ |
| 161 | } |
| 162 | |
| 163 | // Gather, sort and print the results for each primLen. |
| 164 | results := make([]string, 0, numPending) |
| 165 | for ; numPending > 0; numPending-- { |
| 166 | results = append(results, <-ch) |
| 167 | } |
| 168 | sort.Strings(results) |
| 169 | fmt.Printf("\n-------- %8s (up to %3d symbols, %6d combos)"+ |
| 170 | " @1 @2 @3 @4 @5 @6 @7 @8 @9 @10 @11 @12 @13 @14 @15\n", |
| 171 | name, maxNSyms, ftab.count()) |
| 172 | for _, s := range results { |
| 173 | fmt.Println(s) |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | type state struct { |
| 178 | ftab *feasibleTable |
| 179 | primLen uint32 |
| 180 | |
| 181 | currCase struct { |
| 182 | nAssignedCodes nAssignedCodes |
| 183 | } |
| 184 | |
| 185 | worstCase struct { |
| 186 | nAssignedCodes nAssignedCodes |
| 187 | nSecondary uint32 |
| 188 | } |
| 189 | |
| 190 | visited [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bitVector |
| 191 | } |
| 192 | |
| 193 | func doPrimLen(ch chan<- string, maxNSyms uint32, ftab *feasibleTable, primLen uint32) { |
| 194 | z := &state{ |
| 195 | ftab: ftab, |
| 196 | primLen: primLen, |
| 197 | } |
| 198 | |
| 199 | if z.primLen > maxCodeLen { |
| 200 | panic("unreachable") |
| 201 | |
| 202 | } else if z.primLen == maxCodeLen { |
| 203 | // There's only the primary table and no secondary tables. For an |
| 204 | // example that fills that primary table, create a trivial encoding of |
| 205 | // two symbols: one symbol's code is "0", the other's is "1". |
| 206 | z.worstCase.nAssignedCodes[1] = 2 |
| 207 | for n := uint32(2); n <= maxCodeLen; n++ { |
| 208 | z.worstCase.nAssignedCodes[n] = 0 |
| 209 | } |
| 210 | |
| 211 | } else { |
| 212 | // Brute force search the tree of possible code length assignments. |
| 213 | // |
| 214 | // nCodes is always even: the number of unassigned Huffman codes at |
| 215 | // length L is twice the number at length (L-1). At the start, there |
| 216 | // are two unassigned codes of length 1: "0" and "1". |
| 217 | // |
| 218 | // nSyms starts at nCodes, since (nCodes > nSyms) is infeasible: there |
| 219 | // will be unassigned codes. |
| 220 | for nCodes := uint32(2); nCodes <= maxNSyms; nCodes += 2 { |
| 221 | for nSyms := nCodes; nSyms <= maxNSyms; nSyms++ { |
| 222 | if z.ftab[z.primLen+1][nCodes][nSyms] { |
| 223 | z.visit(z.primLen+1, nCodes, nSyms, 0, 0) |
| 224 | } |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | z.worstCase.nAssignedCodes.fillPrimaryValues(z.primLen) |
| 229 | } |
| 230 | |
| 231 | // Collect the details: the number of codes assigned to each length in the |
| 232 | // worst case. |
| 233 | details := "" |
| 234 | for n := uint32(1); n <= maxCodeLen; n++ { |
| 235 | if n == z.primLen+1 { |
| 236 | details += ";" |
| 237 | } else { |
| 238 | details += " " |
| 239 | } |
| 240 | details += fmt.Sprintf("%3d", z.worstCase.nAssignedCodes[n]) |
| 241 | } |
| 242 | |
| 243 | nPrimary := uint32(1) << z.primLen |
| 244 | nSecondary := z.worstCase.nSecondary |
| 245 | nEntries := nPrimary + nSecondary |
| 246 | |
| 247 | ch <- fmt.Sprintf("primLen %2d: %5d entries = %5d prim + %4d seco%s", |
| 248 | z.primLen, nEntries, nPrimary, nSecondary, details) |
| 249 | } |
| 250 | |
| 251 | // visit updates z.worstCase, starting with nCodes unassigned Huffman codes of |
| 252 | // length codeLen for nSyms symbols. nSecondary is the total size (number of |
| 253 | // entries) so far of all of the secondary tables. |
| 254 | // |
| 255 | // nRemain > 0 iff we have an incomplete secondary table, whose size was |
| 256 | // partially accounted for in nSecondary, by (1 << (codeLen - z.primLen)). If |
| 257 | // positive, nRemain is the number of unassigned entries in that incomplete |
| 258 | // secondary table. |
| 259 | // |
| 260 | // visit can recursively call itself with longer codeLen values. As it does so, |
| 261 | // it temporarily sets z.currCase.nAssignedCodes[codeLen], restoring that to |
| 262 | // zero before it returns. |
| 263 | func (z *state) visit(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) { |
| 264 | if z.alreadyVisited(codeLen, nCodes, nSyms, nSecondary, nRemain) { |
| 265 | // No-op. |
| 266 | |
| 267 | } else if nCodes > nSyms { |
| 268 | // Infeasible: there will be unassigned codes. |
| 269 | |
| 270 | } else if nCodes == nSyms { |
| 271 | // Fill in the incomplete secondary table (if present) and any new |
| 272 | // secondary tables (if necessary) to assign the nCodes codes to the |
| 273 | // nSyms symbols. At the end, we should have no remaining entries. |
| 274 | n := nCodes |
| 275 | for n > nRemain { |
| 276 | n -= nRemain |
| 277 | nRemain = 1 << (codeLen - z.primLen) |
| 278 | nSecondary += nRemain |
| 279 | } |
| 280 | if n != nRemain { |
| 281 | panic("inconsistent secondary table size") |
| 282 | } |
| 283 | |
| 284 | // Update z.worstCase if we have a new record for nSecondary. |
| 285 | if z.worstCase.nSecondary < nSecondary { |
| 286 | z.worstCase.nSecondary = nSecondary |
| 287 | z.worstCase.nAssignedCodes = z.currCase.nAssignedCodes |
| 288 | z.worstCase.nAssignedCodes[codeLen] = nCodes |
| 289 | } |
| 290 | |
| 291 | } else { // nCodes < nSyms. |
| 292 | codeLen1 := codeLen + 1 |
| 293 | // Brute force assigning n codes of this codeLen. The other (nCodes - |
| 294 | // n) codes will have longer codes. |
| 295 | for n := uint32(0); n < nCodes; n++ { |
| 296 | nCodes1 := (nCodes - n) * 2 |
| 297 | nSyms1 := nSyms - n |
| 298 | |
| 299 | if nCodes1 > nSyms1 { |
| 300 | // Infeasible: there will be unassigned codes. |
| 301 | |
| 302 | } else if z.ftab[codeLen1][nCodes1][nSyms1] { |
| 303 | nSecondary1 := nSecondary |
| 304 | nRemain1 := nRemain * 2 |
| 305 | |
| 306 | if nRemain1 > 0 { |
| 307 | // We have an incomplete secondary table, which has been |
| 308 | // partially accounted for: nSecondary has previously been |
| 309 | // incremented by X, where X equals (1 << (codeLen - |
| 310 | // z.primLen)). |
| 311 | // |
| 312 | // As we recursively call visit, with codeLen increased by |
| 313 | // 1, then we need to double the accounted size of that |
| 314 | // secondary table to 2*X. Since X out of that 2*X has |
| 315 | // already been added, we need to increment nSecondary1 by |
| 316 | // just the difference, which is also X. |
| 317 | nSecondary1 += 1 << (codeLen - z.primLen) |
| 318 | } |
| 319 | |
| 320 | z.currCase.nAssignedCodes[codeLen] = n |
| 321 | z.visit(codeLen1, nCodes1, nSyms1, nSecondary1, nRemain1) |
| 322 | z.currCase.nAssignedCodes[codeLen] = 0 |
| 323 | } |
| 324 | |
| 325 | // Open a new secondary table if necessary: one whose size is (1 << |
| 326 | // (codeLen - z.primLen)). |
| 327 | if nRemain == 0 { |
| 328 | nRemain = 1 << (codeLen - z.primLen) |
| 329 | nSecondary += nRemain |
| 330 | } |
| 331 | |
| 332 | // Assign one of the incomplete secondary table's entries. |
| 333 | nRemain-- |
| 334 | } |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | // alreadyVisited returns whether z has already visited the (nSecondary, |
| 339 | // nRemain) pair. As a side effect, it also marks that pair as visited. |
| 340 | func (z *state) alreadyVisited(codeLen uint32, nCodes uint32, nSyms uint32, nSecondary uint32, nRemain uint32) bool { |
| 341 | // Use the zig re-numbering to minimize the memory requirements for the |
| 342 | // visited slice. There's an additional 8-fold memory reduction by using 1 |
| 343 | // bit per element, not 1 bool (1 byte) per element. |
| 344 | i := zig(nSecondary/8, nRemain) |
| 345 | mask := uint8(1) << (nSecondary & 7) |
| 346 | |
| 347 | visited := z.visited[codeLen][nCodes][nSyms] |
| 348 | if uint64(i) < uint64(len(visited)) { |
| 349 | x := visited[i] |
| 350 | visited[i] = x | mask |
| 351 | return x&mask != 0 |
| 352 | } |
| 353 | |
| 354 | oldDone := visited |
| 355 | visited = make(bitVector, i+1) |
| 356 | copy(visited, oldDone) |
| 357 | visited[i] = mask |
| 358 | |
| 359 | z.visited[codeLen][nCodes][nSyms] = visited |
| 360 | return false |
| 361 | } |
| 362 | |
| 363 | type bitVector []uint8 |
| 364 | |
| 365 | // zig packs a pair of non-negative integers (i, j) into a single unique |
| 366 | // non-negative integer, in a zig-zagging pattern: |
| 367 | // |
Nigel Tao | be1f605 | 2022-08-21 21:41:06 +1000 | [diff] [blame] | 368 | // . i=0 i=1 i=2 i=3 i=4 etc |
Nigel Tao | ff7bffc | 2019-10-13 23:23:48 +1100 | [diff] [blame] | 369 | // j=0: 0 1 3 6 10 ... |
| 370 | // j=1: 2 4 7 11 16 ... |
| 371 | // j=2: 5 8 12 17 23 ... |
| 372 | // j=3: 9 13 18 24 31 ... |
| 373 | // j=4: 14 19 25 32 40 ... |
| 374 | // etc: ... ... ... ... ... ... |
| 375 | // |
| 376 | // The closed from expression relies on the fact that the sum (0 + 1 + ... + n) |
| 377 | // equals ((n * (n + 1)) / 2). |
| 378 | // |
| 379 | // This lets us minimize the memory requirements of a triangular array: one for |
| 380 | // which a[i][j] is only accessed when ((i+j) < someBound). |
| 381 | func zig(i uint32, j uint32) uint32 { |
| 382 | if (i > 0x4000) || (j > 0x4000) { |
| 383 | panic("overflow") |
| 384 | } |
| 385 | n := i + j |
| 386 | return ((n * (n + 1)) / 2) + j |
| 387 | } |
| 388 | |
| 389 | // nAssignedCodes[i] holds the number of codes of length i. |
| 390 | type nAssignedCodes [maxCodeLen + 1]uint32 |
| 391 | |
| 392 | // fillPrimaryValues fills in feasible a[n] values for n <= primLen. It assumes |
| 393 | // that the caller has already filled in a[n] for n > primLen. |
| 394 | func (a *nAssignedCodes) fillPrimaryValues(primLen uint32) { |
| 395 | // Looping backwards from the maxCodeLen, figure out how many unassigned |
| 396 | // primLen length codes there were. |
| 397 | // |
| 398 | // For example, if there were 10 assigned 15 length codes, then there must |
| 399 | // have been (10 / 2 = 5) unassigned 14 length codes. If there were also 7 |
| 400 | // 14 length codes, then there must have been ((5 + 7) / 2 = 6) unassigned |
| 401 | // 13 length codes. Etcetera. |
| 402 | nUnassignedPrimLen := uint32(0) |
| 403 | for n := uint32(maxCodeLen); n > primLen; n-- { |
| 404 | nUnassignedPrimLen += a[n] |
| 405 | if nUnassignedPrimLen%2 != 0 { |
| 406 | panic("cannot undo Huffman code split") |
| 407 | } |
| 408 | nUnassignedPrimLen /= 2 |
| 409 | } |
| 410 | if nUnassignedPrimLen < 1 { |
| 411 | panic("not enough unassigned primLen length codes") |
| 412 | } |
| 413 | |
| 414 | // Pick a combination of assigning shorter length codes (i.e. setting a[n] |
| 415 | // values) to reach that nUnassignedPrimLen target. There are many possible |
| 416 | // ways to do so, but only one unique way with the additional constraint |
| 417 | // that each value is either 0 or 1: unique because every positive number |
| 418 | // has a unique binary representation. |
| 419 | // |
| 420 | // For example, suppose we need to have 12 unassigned primLen length codes. |
| 421 | // With that additional constraint, the way to do this is: |
| 422 | // |
| 423 | // - 12 unassigned code of length primLen-0. |
| 424 | // - 6 unassigned code of length primLen-1. |
| 425 | // - 3 unassigned code of length primLen-2. |
| 426 | // - 2 unassigned code of length primLen-3. |
| 427 | // - 1 unassigned code of length primLen-4. |
| 428 | // - 1 unassigned code of length primLen-5. |
| 429 | // - 1 unassigned code of length primLen-6. |
| 430 | // - etc |
| 431 | // |
| 432 | // Each left hand column value is either ((2*b)-0) or ((2*b)-1), where b is |
| 433 | // the the number below. Subtracting 0 or 1 means that we assign 0 or 1 |
| 434 | // codes of that row's length: |
| 435 | // |
| 436 | // - 0 assigned code of length primLen-0. |
| 437 | // - 0 assigned code of length primLen-1. |
| 438 | // - 1 assigned code of length primLen-2. |
| 439 | // - 0 assigned code of length primLen-3. |
| 440 | // - 1 assigned code of length primLen-4. |
| 441 | // - 1 assigned code of length primLen-5. |
| 442 | // - 1 assigned code of length primLen-6. |
| 443 | // - etc |
| 444 | // |
| 445 | // Reading upwards, this is the binary string "...1110100", which is the |
| 446 | // two's complement representation of the decimal number -12: the negative |
| 447 | // of nUnassignedPrimLen. |
| 448 | bits := -int32(nUnassignedPrimLen) |
| 449 | for n := primLen; n >= 1; n-- { |
| 450 | a[n] = uint32(bits & 1) |
| 451 | bits >>= 1 |
| 452 | } |
| 453 | if bits != -1 { |
| 454 | panic("did not finish with one unassigned zero-length code") |
| 455 | } |
| 456 | |
Nigel Tao | 2f78804 | 2021-01-23 19:29:19 +1100 | [diff] [blame] | 457 | // As a coherence check of the "negative two's complement" theory, loop |
Nigel Tao | ff7bffc | 2019-10-13 23:23:48 +1100 | [diff] [blame] | 458 | // forwards to calculate the number of unassigned primLen length codes, |
| 459 | // which should match nUnassignedPrimLen. |
| 460 | nUnassigned := uint32(1) |
| 461 | for n := uint32(1); n <= primLen; n++ { |
| 462 | nUnassigned *= 2 |
| 463 | if nUnassigned <= a[n] { |
| 464 | panic("not enough unassigned codes") |
| 465 | } |
| 466 | nUnassigned -= a[n] |
| 467 | } |
| 468 | if nUnassigned != nUnassignedPrimLen { |
| 469 | panic("inconsistent unassigned codes") |
| 470 | } |
| 471 | } |
| 472 | |
| 473 | // feasibleTable[codeLen][nCodes][nSyms] is whether it is feasible to assign |
| 474 | // Huffman codes (of length at least codeLen) to nSyms symbols, given nCodes |
| 475 | // unassigned codes of length codeLen. Each unassigned code of length L can be |
| 476 | // split into 2 codes of length L+1, 4 codes of length L+2, etc, up to a |
| 477 | // maximum code length of maxCodeLen. A feasible assignment ends with zero |
| 478 | // unassigned codes, no more and no less. |
| 479 | type feasibleTable [maxCodeLen + 1][maxMaxNSyms + 1][maxMaxNSyms + 1]bool |
| 480 | |
| 481 | func (f *feasibleTable) count() (ret uint64) { |
| 482 | for i := range *f { |
| 483 | for j := range f[i] { |
| 484 | for k := range f[i][j] { |
| 485 | if f[i][j][k] { |
| 486 | ret++ |
| 487 | } |
| 488 | } |
| 489 | } |
| 490 | } |
| 491 | return ret |
| 492 | } |
| 493 | |
| 494 | func (f *feasibleTable) build(maxNSyms uint32) { |
| 495 | for nSyms := uint32(2); nSyms <= maxNSyms; nSyms++ { |
| 496 | if f.buildMore(1, 2, nSyms) != bmResultOK { |
| 497 | panic("infeasible [codeLen][nCodes][nSyms] combination") |
| 498 | } |
| 499 | } |
| 500 | } |
| 501 | |
| 502 | const ( |
| 503 | bmResultOK = 0 |
| 504 | bmResultUseFewerCodes = 1 |
| 505 | bmResultUseMoreCodes = 2 |
| 506 | bmResultUnsatisfiable = 3 |
| 507 | ) |
| 508 | |
| 509 | func (f *feasibleTable) buildMore(codeLen uint32, nCodes uint32, nSyms uint32) uint32 { |
| 510 | if nCodes == nSyms { |
| 511 | if nCodes%2 == 1 { |
| 512 | panic("odd nCodes declared feasible") |
| 513 | } |
| 514 | // This is trivial: assign every symbol a code of length codeLen. |
| 515 | f[codeLen][nCodes][nSyms] = true |
| 516 | return bmResultOK |
| 517 | } else if nCodes > nSyms { |
| 518 | // Infeasible: there will be unassigned codes. |
| 519 | return bmResultUseMoreCodes |
| 520 | } |
| 521 | // From here onwards, we have (nCodes < nSyms). |
| 522 | |
| 523 | if (nCodes << (maxCodeLen - codeLen)) < nSyms { |
| 524 | // Infeasible: there will be unassigned symbols, even if we extend the |
| 525 | // codes out to maxCodeLen. |
| 526 | return bmResultUseFewerCodes |
| 527 | } |
| 528 | |
| 529 | // If we've already visited this [codeLen][nCodes][nSyms] combination, |
| 530 | // there's no need to re-do the computation. |
| 531 | if f[codeLen][nCodes][nSyms] { |
| 532 | return bmResultOK |
| 533 | } |
| 534 | |
| 535 | // Try assigning n out of the nCodes codes 1-to-1 to a symbol, remembering |
| 536 | // that we have checked above that (nCodes < nSyms). The remaining (nCodes |
| 537 | // - n) codes are lengthened by 1, doubling the number of them, and try to |
| 538 | // assign those longer codes to the remaining (nSyms - n) symbols. |
| 539 | ok := false |
| 540 | codeLen1 := codeLen + 1 |
| 541 | for n := uint32(0); n < nCodes; n++ { |
| 542 | nCodes1 := (nCodes - n) * 2 |
| 543 | nSyms1 := nSyms - n |
| 544 | |
| 545 | if x := f.buildMore(codeLen1, nCodes1, nSyms1); x == bmResultOK { |
| 546 | ok = true |
| 547 | } else if x == bmResultUseFewerCodes { |
| 548 | // This value of n didn't succeed, but also any larger n also won't |
| 549 | // succeed, so we can break the loop early. |
| 550 | break |
| 551 | } |
| 552 | } |
| 553 | |
| 554 | if !ok { |
| 555 | return bmResultUnsatisfiable |
| 556 | } |
| 557 | if nCodes%2 == 1 { |
| 558 | panic("odd nCodes declared feasible") |
| 559 | } |
| 560 | f[codeLen][nCodes][nSyms] = true |
| 561 | return bmResultOK |
| 562 | } |