Nigel Tao | 79a9455 | 2017-11-30 16:37:20 +1100 | [diff] [blame] | 1 | // Copyright 2017 The Wuffs Authors. |
Nigel Tao | d4372cb | 2017-10-12 11:17:41 +1100 | [diff] [blame] | 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
Nigel Tao | 90e49ca | 2017-06-08 17:37:04 +1000 | [diff] [blame] | 14 | |
Nigel Tao | 045f180 | 2019-02-23 17:06:24 +1100 | [diff] [blame] | 15 | pub status "#bad code" |
Nigel Tao | 48e69da | 2023-01-31 14:11:06 +1100 | [diff] [blame] | 16 | pub status "#truncated input" |
Nigel Tao | 3863427 | 2017-06-09 12:26:00 +1000 | [diff] [blame] | 17 | |
Nigel Tao | 045f180 | 2019-02-23 17:06:24 +1100 | [diff] [blame] | 18 | pri status "#internal error: inconsistent I/O" |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 19 | |
Nigel Tao | 3056a84 | 2019-02-03 15:25:53 +1100 | [diff] [blame] | 20 | // TODO: move bulk data buffers like decoder.suffixes or decoder.output into |
Nigel Tao | d9c3692 | 2019-02-03 15:32:40 +1100 | [diff] [blame] | 21 | // the workbuf? The first attempt at this was a performance regression for |
| 22 | // decoding all but the smallest GIFs. See these git commits for numbers: |
| 23 | // - 49627b4 Flatten the lzw.decoder.suffixes array |
| 24 | // - f877fb2 Use the workbuf instead of lzw.decoder.suffixes |
| 25 | // - 85be5b9 Delete the obsolete lzw.decoder.suffixes array |
| 26 | // and the roll back has combined numbers: |
| 27 | // - 3056a84 Roll back 3 recent lzw.decoder.suffixes commits |
Nigel Tao | 532e18c | 2020-04-14 14:38:07 +1000 | [diff] [blame] | 28 | pub const DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE : base.u64 = 0 |
Nigel Tao | be2d96b | 2019-02-02 15:51:04 +1100 | [diff] [blame] | 29 | |
Nigel Tao | 92a5bfd | 2020-01-11 22:25:31 +1100 | [diff] [blame] | 30 | pub struct decoder? implements base.io_transformer( |
Nigel Tao | d41430c | 2019-05-26 15:51:46 +1000 | [diff] [blame] | 31 | // set_literal_width_arg is 1 plus the saved argument passed to |
| 32 | // set_literal_width. This is assigned to the literal_width field at the |
Nigel Tao | 5f8d2da | 2020-01-10 22:06:50 +1100 | [diff] [blame] | 33 | // start of transform_io. During that method, calling set_literal_width |
Nigel Tao | d41430c | 2019-05-26 15:51:46 +1000 | [diff] [blame] | 34 | // will change set_literal_width_arg but not literal_width. |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 35 | set_literal_width_arg : base.u32[..= 9], |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 36 | |
| 37 | // read_from state that does not change during a decode call. |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 38 | literal_width : base.u32[..= 8], |
| 39 | clear_code : base.u32[..= 256], |
| 40 | end_code : base.u32[..= 257], |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 41 | |
| 42 | // read_from state that does change during a decode call. |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 43 | save_code : base.u32[..= 4096], |
| 44 | prev_code : base.u32[..= 4095], |
| 45 | width : base.u32[..= 12], |
| 46 | bits : base.u32, |
| 47 | n_bits : base.u32[..= 31], |
| 48 | output_ri : base.u32[..= 8191], |
| 49 | output_wi : base.u32[..= 8191], |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 50 | |
| 51 | // read_from return value. The read_from method effectively returns a |
| 52 | // base.u32 to show how decode should continue after calling write_to. That |
| 53 | // value needs to be saved across write_to's possible suspension, so we |
| 54 | // might as well save it explicitly as a decoder field. |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 55 | read_from_return_value : base.u32, |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 56 | |
Nigel Tao | 6974c4d | 2019-02-23 13:13:26 +1100 | [diff] [blame] | 57 | // read_from per-code state. |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 58 | prefixes : array[4096] base.u16[..= 4095], |
Nigel Tao | 6974c4d | 2019-02-23 13:13:26 +1100 | [diff] [blame] | 59 | |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 60 | util : base.utility, |
Nigel Tao | 438fc10 | 2019-02-10 21:36:13 +1100 | [diff] [blame] | 61 | )( |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 62 | // read_from per-code state. |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 63 | suffixes : array[4096] array[8] base.u8, |
Nigel Tao | 479326c | 2018-10-21 16:07:36 +1100 | [diff] [blame] | 64 | // lm1s is the "length minus 1"s of the values for the implicit key-value |
| 65 | // table in this decoder. See std/lzw/README.md for more detail. |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 66 | lm1s : array[4096] base.u16, |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 67 | |
Nigel Tao | 314b120 | 2018-12-29 14:23:27 +1100 | [diff] [blame] | 68 | // output[output_ri:output_wi] is the buffered output, connecting read_from |
| 69 | // with write_to and flush. |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 70 | output : array[8192 + 7] base.u8, |
Nigel Tao | b640dac | 2017-05-05 18:07:09 +1000 | [diff] [blame] | 71 | ) |
| 72 | |
Nigel Tao | 4bc42db | 2020-04-12 20:53:54 +1000 | [diff] [blame] | 73 | pub func decoder.set_quirk_enabled!(quirk: base.u32, enabled: base.bool) { |
| 74 | } |
| 75 | |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 76 | pub func decoder.set_literal_width!(lw: base.u32[..= 8]) { |
Nigel Tao | d41430c | 2019-05-26 15:51:46 +1000 | [diff] [blame] | 77 | this.set_literal_width_arg = args.lw + 1 |
Nigel Tao | 019e83f | 2017-06-11 13:58:12 +1000 | [diff] [blame] | 78 | } |
| 79 | |
Nigel Tao | b9cbce9 | 2019-02-02 16:34:40 +1100 | [diff] [blame] | 80 | pub func decoder.workbuf_len() base.range_ii_u64 { |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 81 | return this.util.make_range_ii_u64(min_incl: 0, max_incl: 0) |
Nigel Tao | b9cbce9 | 2019-02-02 16:34:40 +1100 | [diff] [blame] | 82 | } |
| 83 | |
Nigel Tao | 5f8d2da | 2020-01-10 22:06:50 +1100 | [diff] [blame] | 84 | pub func decoder.transform_io?(dst: base.io_writer, src: base.io_reader, workbuf: slice base.u8) { |
Nigel Tao | a21ad25 | 2019-10-12 21:55:13 +1100 | [diff] [blame] | 85 | var i : base.u32[..= 8191] |
Nigel Tao | 9bcdd1e | 2018-12-26 19:35:28 +1100 | [diff] [blame] | 86 | |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 87 | // Initialize read_from state. |
| 88 | this.literal_width = 8 |
Nigel Tao | 5bea867 | 2019-05-12 19:18:30 +1000 | [diff] [blame] | 89 | if this.set_literal_width_arg > 0 { |
Nigel Tao | d41430c | 2019-05-26 15:51:46 +1000 | [diff] [blame] | 90 | this.literal_width = this.set_literal_width_arg - 1 |
Nigel Tao | 0e498cb | 2018-05-29 09:07:23 +1000 | [diff] [blame] | 91 | } |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 92 | this.clear_code = (1 as base.u32) << this.literal_width |
| 93 | this.end_code = this.clear_code + 1 |
| 94 | this.save_code = this.end_code |
| 95 | this.prev_code = this.end_code |
| 96 | this.width = this.literal_width + 1 |
| 97 | this.bits = 0 |
| 98 | this.n_bits = 0 |
Nigel Tao | 314b120 | 2018-12-29 14:23:27 +1100 | [diff] [blame] | 99 | this.output_ri = 0 |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 100 | this.output_wi = 0 |
Nigel Tao | 9bcdd1e | 2018-12-26 19:35:28 +1100 | [diff] [blame] | 101 | i = 0 |
Nigel Tao | 3056a84 | 2019-02-03 15:25:53 +1100 | [diff] [blame] | 102 | while i < this.clear_code { |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 103 | assert i < 256 via "a < b: a < c; c <= b"(c: this.clear_code) |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 104 | this.lm1s[i] = 0 |
Nigel Tao | 3056a84 | 2019-02-03 15:25:53 +1100 | [diff] [blame] | 105 | this.suffixes[i][0] = i as base.u8 |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 106 | i += 1 |
Nigel Tao | 58351d4 | 2020-03-25 21:38:31 +1100 | [diff] [blame] | 107 | } endwhile |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 108 | |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 109 | while true { |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 110 | this.read_from!(src: args.src) |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 111 | |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 112 | if this.output_wi > 0 { |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 113 | this.write_to?(dst: args.dst) |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 114 | } |
| 115 | |
| 116 | if this.read_from_return_value == 0 { |
| 117 | break |
| 118 | } else if this.read_from_return_value == 1 { |
| 119 | continue |
| 120 | } else if this.read_from_return_value == 2 { |
Nigel Tao | 931fdce | 2019-02-23 17:15:57 +1100 | [diff] [blame] | 121 | yield? base."$short read" |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 122 | } else if this.read_from_return_value == 3 { |
Nigel Tao | 48e69da | 2023-01-31 14:11:06 +1100 | [diff] [blame] | 123 | return "#truncated input" |
| 124 | } else if this.read_from_return_value == 4 { |
Nigel Tao | 045f180 | 2019-02-23 17:06:24 +1100 | [diff] [blame] | 125 | return "#bad code" |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 126 | } else { |
Nigel Tao | 045f180 | 2019-02-23 17:06:24 +1100 | [diff] [blame] | 127 | return "#internal error: inconsistent I/O" |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 128 | } |
Nigel Tao | 58351d4 | 2020-03-25 21:38:31 +1100 | [diff] [blame] | 129 | } endwhile |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 130 | } |
| 131 | |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 132 | pri func decoder.read_from!(src: base.io_reader) { |
Nigel Tao | a21ad25 | 2019-10-12 21:55:13 +1100 | [diff] [blame] | 133 | var clear_code : base.u32[..= 256] |
| 134 | var end_code : base.u32[..= 257] |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 135 | |
Nigel Tao | a21ad25 | 2019-10-12 21:55:13 +1100 | [diff] [blame] | 136 | var save_code : base.u32[..= 4096] |
| 137 | var prev_code : base.u32[..= 4095] |
| 138 | var width : base.u32[..= 12] |
| 139 | var bits : base.u32 |
| 140 | var n_bits : base.u32[..= 31] |
| 141 | var output_wi : base.u32[..= 8191] |
Nigel Tao | 9bcdd1e | 2018-12-26 19:35:28 +1100 | [diff] [blame] | 142 | |
Nigel Tao | a21ad25 | 2019-10-12 21:55:13 +1100 | [diff] [blame] | 143 | var code : base.u32[..= 4095] |
| 144 | var c : base.u32[..= 4095] |
| 145 | var o : base.u32[..= 8191] |
| 146 | var steps : base.u32 |
| 147 | var first_byte : base.u8 |
| 148 | var lm1_b : base.u16[..= 4095] |
| 149 | var lm1_a : base.u16[..= 4095] |
Nigel Tao | 9bcdd1e | 2018-12-26 19:35:28 +1100 | [diff] [blame] | 150 | |
| 151 | clear_code = this.clear_code |
| 152 | end_code = this.end_code |
| 153 | |
| 154 | save_code = this.save_code |
| 155 | prev_code = this.prev_code |
| 156 | width = this.width |
| 157 | bits = this.bits |
| 158 | n_bits = this.n_bits |
| 159 | output_wi = this.output_wi |
Nigel Tao | 05a217e | 2017-05-18 15:15:58 +1000 | [diff] [blame] | 160 | |
Nigel Tao | 3056a84 | 2019-02-03 15:25:53 +1100 | [diff] [blame] | 161 | while true { |
Nigel Tao | 6945bdc | 2018-10-20 07:22:23 +1100 | [diff] [blame] | 162 | if n_bits < width { |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 163 | assert n_bits < 12 via "a < b: a < c; c <= b"(c: width) |
Nigel Tao | be8542e | 2020-08-13 23:26:08 +1000 | [diff] [blame] | 164 | if args.src.length() >= 4 { |
Nigel Tao | 6945bdc | 2018-10-20 07:22:23 +1100 | [diff] [blame] | 165 | // Read 4 bytes, using the "Variant 4" technique of |
| 166 | // https://fgiesen.wordpress.com/2018/02/20/reading-bits-in-far-too-many-ways-part-2/ |
Nigel Tao | 2f4c931 | 2018-12-23 09:12:56 +1100 | [diff] [blame] | 167 | bits |= args.src.peek_u32le() ~mod<< n_bits |
Nigel Tao | 8b70ad0 | 2020-05-27 23:28:44 +1000 | [diff] [blame] | 168 | args.src.skip_u32_fast!(actual: (31 - n_bits) >> 3, worst_case: 3) |
Nigel Tao | 6945bdc | 2018-10-20 07:22:23 +1100 | [diff] [blame] | 169 | n_bits |= 24 |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 170 | assert width <= n_bits via "a <= b: a <= c; c <= b"(c: 12) |
Nigel Tao | 6945bdc | 2018-10-20 07:22:23 +1100 | [diff] [blame] | 171 | assert n_bits >= width via "a >= b: b <= a"() |
Nigel Tao | be8542e | 2020-08-13 23:26:08 +1000 | [diff] [blame] | 172 | } else if args.src.length() <= 0 { |
Nigel Tao | 48e69da | 2023-01-31 14:11:06 +1100 | [diff] [blame] | 173 | if args.src.is_closed() { |
| 174 | this.read_from_return_value = 3 |
| 175 | } else { |
| 176 | this.read_from_return_value = 2 |
| 177 | } |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 178 | break |
Nigel Tao | 6945bdc | 2018-10-20 07:22:23 +1100 | [diff] [blame] | 179 | } else { |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 180 | bits |= args.src.peek_u8_as_u32() << n_bits |
Nigel Tao | 8b70ad0 | 2020-05-27 23:28:44 +1000 | [diff] [blame] | 181 | args.src.skip_u32_fast!(actual: 1, worst_case: 1) |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 182 | n_bits += 8 |
| 183 | if n_bits >= width { |
| 184 | // No-op. |
Nigel Tao | be8542e | 2020-08-13 23:26:08 +1000 | [diff] [blame] | 185 | } else if args.src.length() <= 0 { |
Nigel Tao | 48e69da | 2023-01-31 14:11:06 +1100 | [diff] [blame] | 186 | if args.src.is_closed() { |
| 187 | this.read_from_return_value = 3 |
| 188 | } else { |
| 189 | this.read_from_return_value = 2 |
| 190 | } |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 191 | break |
| 192 | } else { |
Nigel Tao | 2f4c931 | 2018-12-23 09:12:56 +1100 | [diff] [blame] | 193 | bits |= args.src.peek_u8_as_u32() << n_bits |
Nigel Tao | 8b70ad0 | 2020-05-27 23:28:44 +1000 | [diff] [blame] | 194 | args.src.skip_u32_fast!(actual: 1, worst_case: 1) |
Nigel Tao | 6945bdc | 2018-10-20 07:22:23 +1100 | [diff] [blame] | 195 | n_bits += 8 |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 196 | assert width <= n_bits via "a <= b: a <= c; c <= b"(c: 12) |
Nigel Tao | 7405307 | 2018-12-26 16:52:55 +1100 | [diff] [blame] | 197 | assert n_bits >= width via "a >= b: b <= a"() |
| 198 | |
| 199 | // This if condition is always false, but for some unknown |
| 200 | // reason, removing it worsens the benchmarks slightly. |
| 201 | if n_bits < width { |
Nigel Tao | 48e69da | 2023-01-31 14:11:06 +1100 | [diff] [blame] | 202 | this.read_from_return_value = 5 |
Nigel Tao | 6945bdc | 2018-10-20 07:22:23 +1100 | [diff] [blame] | 203 | break |
| 204 | } |
Nigel Tao | 24e7ee0 | 2018-10-14 22:35:42 +1100 | [diff] [blame] | 205 | } |
Nigel Tao | ff92d22 | 2018-10-14 22:14:02 +1100 | [diff] [blame] | 206 | } |
Nigel Tao | bcf6a7e | 2017-06-01 14:55:06 +1000 | [diff] [blame] | 207 | } |
Nigel Tao | ff92d22 | 2018-10-14 22:14:02 +1100 | [diff] [blame] | 208 | |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 209 | code = bits.low_bits(n: width) |
Nigel Tao | 5a0aea6 | 2017-06-02 10:57:56 +1000 | [diff] [blame] | 210 | bits >>= width |
Nigel Tao | 80a39bf | 2017-06-02 11:39:51 +1000 | [diff] [blame] | 211 | n_bits -= width |
Nigel Tao | bcf6a7e | 2017-06-01 14:55:06 +1000 | [diff] [blame] | 212 | |
Nigel Tao | a817526 | 2017-06-08 15:49:52 +1000 | [diff] [blame] | 213 | if code < clear_code { |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 214 | assert code < 256 via "a < b: a < c; c <= b"(c: clear_code) |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 215 | this.output[output_wi] = code as base.u8 |
| 216 | output_wi = (output_wi + 1) & 8191 |
Nigel Tao | 3a46d6c | 2017-06-30 15:43:40 +1000 | [diff] [blame] | 217 | if save_code <= 4095 { |
Nigel Tao | 92959fb | 2019-02-16 15:40:02 +1100 | [diff] [blame] | 218 | lm1_a = (this.lm1s[prev_code] ~mod+ 1) & 4095 |
Nigel Tao | 479326c | 2018-10-21 16:07:36 +1100 | [diff] [blame] | 219 | this.lm1s[save_code] = lm1_a |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 220 | |
Nigel Tao | ef7aa0d | 2019-03-02 14:49:02 +1100 | [diff] [blame] | 221 | if (lm1_a % 8) <> 0 { |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 222 | this.prefixes[save_code] = this.prefixes[prev_code] |
Nigel Tao | 3056a84 | 2019-02-03 15:25:53 +1100 | [diff] [blame] | 223 | this.suffixes[save_code] = this.suffixes[prev_code] |
| 224 | this.suffixes[save_code][lm1_a % 8] = code as base.u8 |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 225 | } else { |
| 226 | this.prefixes[save_code] = prev_code as base.u16 |
Nigel Tao | 3056a84 | 2019-02-03 15:25:53 +1100 | [diff] [blame] | 227 | this.suffixes[save_code][0] = code as base.u8 |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 228 | } |
| 229 | |
Nigel Tao | 91bf555 | 2018-06-30 08:33:23 +1000 | [diff] [blame] | 230 | save_code += 1 |
Nigel Tao | abeff31 | 2019-01-12 09:15:30 +1100 | [diff] [blame] | 231 | if width < 12 { |
| 232 | width += 1 & (save_code >> width) |
Nigel Tao | 91bf555 | 2018-06-30 08:33:23 +1000 | [diff] [blame] | 233 | } |
Nigel Tao | 92b5663 | 2018-06-30 08:47:45 +1000 | [diff] [blame] | 234 | prev_code = code |
Nigel Tao | 587bc2a | 2017-06-09 18:00:33 +1000 | [diff] [blame] | 235 | } |
Nigel Tao | 711adf5 | 2017-06-09 15:21:03 +1000 | [diff] [blame] | 236 | |
Nigel Tao | 9cd37ad | 2018-06-30 08:40:24 +1000 | [diff] [blame] | 237 | } else if code <= end_code { |
| 238 | if code == end_code { |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 239 | this.read_from_return_value = 0 |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 240 | break |
Nigel Tao | 9cd37ad | 2018-06-30 08:40:24 +1000 | [diff] [blame] | 241 | } |
Nigel Tao | 0b034d7 | 2017-06-09 12:34:32 +1000 | [diff] [blame] | 242 | save_code = end_code |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 243 | prev_code = end_code |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 244 | width = this.literal_width + 1 |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 245 | |
Nigel Tao | 0b034d7 | 2017-06-09 12:34:32 +1000 | [diff] [blame] | 246 | } else if code <= save_code { |
Nigel Tao | 9bcdd1e | 2018-12-26 19:35:28 +1100 | [diff] [blame] | 247 | c = code |
Nigel Tao | 3a46d6c | 2017-06-30 15:43:40 +1000 | [diff] [blame] | 248 | if code == save_code { |
Nigel Tao | 8d6694a | 2017-06-10 15:23:43 +1000 | [diff] [blame] | 249 | c = prev_code |
Nigel Tao | 98a0792 | 2017-06-10 14:11:24 +1000 | [diff] [blame] | 250 | } |
| 251 | |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 252 | // Letting old_wi and new_wi denote the values of output_wi before |
| 253 | // and after these two lines of code, the decoded bytes will be |
| 254 | // written to output[old_wi:new_wi]. They will be written |
| 255 | // back-to-front, 8 bytes at a time, starting by writing |
| 256 | // output[o:o + 8], which will contain output[new_wi - 1]. |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 257 | // |
| 258 | // In the special case that code == save_code, the decoded bytes |
| 259 | // contain an extra copy (at the end) of the first byte, and will |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 260 | // be written to output[old_wi:new_wi + 1]. |
Nigel Tao | b40671e | 2020-02-08 18:21:29 +1100 | [diff] [blame] | 261 | o = (output_wi + ((this.lm1s[c] as base.u32) & 0xFFFF_FFF8)) & 8191 |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 262 | output_wi = (output_wi + 1 + (this.lm1s[c] as base.u32)) & 8191 |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 263 | |
Nigel Tao | 9bcdd1e | 2018-12-26 19:35:28 +1100 | [diff] [blame] | 264 | steps = (this.lm1s[c] as base.u32) >> 3 |
Nigel Tao | 3056a84 | 2019-02-03 15:25:53 +1100 | [diff] [blame] | 265 | while true { |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 266 | assert o <= (o + 8) via "a <= (a + b): 0 <= b"(b: 8) |
Nigel Tao | f60723f | 2018-10-21 18:03:54 +1100 | [diff] [blame] | 267 | |
| 268 | // The final "8" is redundant semantically, but helps the |
| 269 | // wuffs-c code generator recognize that both slices have the |
| 270 | // same constant length, and hence produce efficient C code. |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 271 | this.output[o .. o + 8].copy_from_slice!(s: this.suffixes[c][.. 8]) |
Nigel Tao | f60723f | 2018-10-21 18:03:54 +1100 | [diff] [blame] | 272 | |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 273 | if steps <= 0 { |
| 274 | break |
| 275 | } |
| 276 | steps -= 1 |
| 277 | |
| 278 | // This line is essentially "o -= 8". The "& 8191" is a no-op |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 279 | // in practice, but is necessary for the overflow checker. |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 280 | o = (o ~mod- 8) & 8191 |
Nigel Tao | 6974c4d | 2019-02-23 13:13:26 +1100 | [diff] [blame] | 281 | c = this.prefixes[c] as base.u32 |
Nigel Tao | 58351d4 | 2020-03-25 21:38:31 +1100 | [diff] [blame] | 282 | } endwhile |
Nigel Tao | 3056a84 | 2019-02-03 15:25:53 +1100 | [diff] [blame] | 283 | first_byte = this.suffixes[c][0] |
Nigel Tao | 8d6694a | 2017-06-10 15:23:43 +1000 | [diff] [blame] | 284 | |
Nigel Tao | 3a46d6c | 2017-06-30 15:43:40 +1000 | [diff] [blame] | 285 | if code == save_code { |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 286 | this.output[output_wi] = first_byte |
| 287 | output_wi = (output_wi + 1) & 8191 |
Nigel Tao | 66f7120 | 2017-10-13 10:30:22 +1100 | [diff] [blame] | 288 | } |
Nigel Tao | 8d6694a | 2017-06-10 15:23:43 +1000 | [diff] [blame] | 289 | |
Nigel Tao | 3a46d6c | 2017-06-30 15:43:40 +1000 | [diff] [blame] | 290 | if save_code <= 4095 { |
Nigel Tao | 92959fb | 2019-02-16 15:40:02 +1100 | [diff] [blame] | 291 | lm1_b = (this.lm1s[prev_code] ~mod+ 1) & 4095 |
Nigel Tao | 479326c | 2018-10-21 16:07:36 +1100 | [diff] [blame] | 292 | this.lm1s[save_code] = lm1_b |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 293 | |
Nigel Tao | ef7aa0d | 2019-03-02 14:49:02 +1100 | [diff] [blame] | 294 | if (lm1_b % 8) <> 0 { |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 295 | this.prefixes[save_code] = this.prefixes[prev_code] |
Nigel Tao | 3056a84 | 2019-02-03 15:25:53 +1100 | [diff] [blame] | 296 | this.suffixes[save_code] = this.suffixes[prev_code] |
| 297 | this.suffixes[save_code][lm1_b % 8] = first_byte |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 298 | } else { |
| 299 | this.prefixes[save_code] = prev_code as base.u16 |
Nigel Tao | 3056a84 | 2019-02-03 15:25:53 +1100 | [diff] [blame] | 300 | this.suffixes[save_code][0] = first_byte as base.u8 |
Nigel Tao | 3338d6d | 2018-10-21 14:48:40 +1100 | [diff] [blame] | 301 | } |
| 302 | |
Nigel Tao | 91bf555 | 2018-06-30 08:33:23 +1000 | [diff] [blame] | 303 | save_code += 1 |
Nigel Tao | abeff31 | 2019-01-12 09:15:30 +1100 | [diff] [blame] | 304 | if width < 12 { |
| 305 | width += 1 & (save_code >> width) |
Nigel Tao | 91bf555 | 2018-06-30 08:33:23 +1000 | [diff] [blame] | 306 | } |
Nigel Tao | 92b5663 | 2018-06-30 08:47:45 +1000 | [diff] [blame] | 307 | prev_code = code |
Nigel Tao | 8d6694a | 2017-06-10 15:23:43 +1000 | [diff] [blame] | 308 | } |
Nigel Tao | 711adf5 | 2017-06-09 15:21:03 +1000 | [diff] [blame] | 309 | |
Nigel Tao | 3863427 | 2017-06-09 12:26:00 +1000 | [diff] [blame] | 310 | } else { |
Nigel Tao | 48e69da | 2023-01-31 14:11:06 +1100 | [diff] [blame] | 311 | this.read_from_return_value = 4 |
Nigel Tao | 179f7af | 2018-12-22 14:45:42 +1100 | [diff] [blame] | 312 | break |
Nigel Tao | 84bf437 | 2017-06-06 19:01:48 +1000 | [diff] [blame] | 313 | } |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 314 | |
| 315 | // Flush the output if it could be too full to contain the entire |
| 316 | // decoding of the next code. The longest possible decoding is slightly |
| 317 | // less than 4096 and output's length is 8192, so a conservative |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 318 | // threshold is ensuring that output_wi <= 4095. |
| 319 | if output_wi > 4095 { |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 320 | this.read_from_return_value = 1 |
| 321 | break |
| 322 | } |
Nigel Tao | 58351d4 | 2020-03-25 21:38:31 +1100 | [diff] [blame] | 323 | } endwhile |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 324 | |
| 325 | // Rewind args.src, if we're not in "$short read" and we've read too many |
| 326 | // bits. |
Nigel Tao | ef7aa0d | 2019-03-02 14:49:02 +1100 | [diff] [blame] | 327 | if this.read_from_return_value <> 2 { |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 328 | while n_bits >= 8 { |
| 329 | n_bits -= 8 |
| 330 | if args.src.can_undo_byte() { |
| 331 | args.src.undo_byte!() |
| 332 | } else { |
Nigel Tao | 48e69da | 2023-01-31 14:11:06 +1100 | [diff] [blame] | 333 | this.read_from_return_value = 5 |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 334 | break |
Nigel Tao | ff92d22 | 2018-10-14 22:14:02 +1100 | [diff] [blame] | 335 | } |
Nigel Tao | 58351d4 | 2020-03-25 21:38:31 +1100 | [diff] [blame] | 336 | } endwhile |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 337 | } |
| 338 | |
Nigel Tao | c2826e8 | 2018-12-26 11:01:08 +1100 | [diff] [blame] | 339 | this.save_code = save_code |
| 340 | this.prev_code = prev_code |
| 341 | this.width = width |
| 342 | this.bits = bits |
| 343 | this.n_bits = n_bits |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 344 | this.output_wi = output_wi |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 345 | } |
| 346 | |
Nigel Tao | 1520a1f | 2019-10-12 21:49:04 +1100 | [diff] [blame] | 347 | pri func decoder.write_to?(dst: base.io_writer) { |
Nigel Tao | a21ad25 | 2019-10-12 21:55:13 +1100 | [diff] [blame] | 348 | var s : slice base.u8 |
| 349 | var n : base.u64 |
Nigel Tao | 9bcdd1e | 2018-12-26 19:35:28 +1100 | [diff] [blame] | 350 | |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 351 | while this.output_wi > 0 { |
Nigel Tao | 314b120 | 2018-12-29 14:23:27 +1100 | [diff] [blame] | 352 | if this.output_ri > this.output_wi { |
Nigel Tao | 045f180 | 2019-02-23 17:06:24 +1100 | [diff] [blame] | 353 | return "#internal error: inconsistent I/O" |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 354 | } |
Nigel Tao | a9ea63f | 2019-10-12 21:14:18 +1100 | [diff] [blame] | 355 | s = this.output[this.output_ri .. this.output_wi] |
Nigel Tao | 5b4e364 | 2019-10-12 21:21:08 +1100 | [diff] [blame] | 356 | n = args.dst.copy_from_slice!(s: s) |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 357 | if n == s.length() { |
Nigel Tao | 314b120 | 2018-12-29 14:23:27 +1100 | [diff] [blame] | 358 | this.output_ri = 0 |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 359 | this.output_wi = 0 |
Nigel Tao | 762b320 | 2018-11-24 15:09:28 +1100 | [diff] [blame] | 360 | return ok |
Nigel Tao | 4e27b12 | 2018-10-14 17:53:20 +1100 | [diff] [blame] | 361 | } |
Nigel Tao | b40671e | 2020-02-08 18:21:29 +1100 | [diff] [blame] | 362 | this.output_ri = (this.output_ri ~mod+ ((n & 0xFFFF_FFFF) as base.u32)) & 8191 |
Nigel Tao | 931fdce | 2019-02-23 17:15:57 +1100 | [diff] [blame] | 363 | yield? base."$short write" |
Nigel Tao | 58351d4 | 2020-03-25 21:38:31 +1100 | [diff] [blame] | 364 | } endwhile |
Nigel Tao | b640dac | 2017-05-05 18:07:09 +1000 | [diff] [blame] | 365 | } |
Nigel Tao | 622a76d | 2018-12-16 09:57:15 +1100 | [diff] [blame] | 366 | |
Nigel Tao | 9311f7a | 2018-12-16 10:23:53 +1100 | [diff] [blame] | 367 | pub func decoder.flush!() slice base.u8 { |
Nigel Tao | a21ad25 | 2019-10-12 21:55:13 +1100 | [diff] [blame] | 368 | var s : slice base.u8 |
Nigel Tao | 9bcdd1e | 2018-12-26 19:35:28 +1100 | [diff] [blame] | 369 | |
Nigel Tao | 314b120 | 2018-12-29 14:23:27 +1100 | [diff] [blame] | 370 | if this.output_ri <= this.output_wi { |
Nigel Tao | a9ea63f | 2019-10-12 21:14:18 +1100 | [diff] [blame] | 371 | s = this.output[this.output_ri .. this.output_wi] |
Nigel Tao | 314b120 | 2018-12-29 14:23:27 +1100 | [diff] [blame] | 372 | } |
| 373 | this.output_ri = 0 |
Nigel Tao | 00c5f77 | 2018-12-26 11:15:22 +1100 | [diff] [blame] | 374 | this.output_wi = 0 |
Nigel Tao | 622a76d | 2018-12-16 09:57:15 +1100 | [diff] [blame] | 375 | return s |
| 376 | } |