blob: 93d6ac5ac3df5c00a090e3de7da0c9a0429f203b [file] [log] [blame]
Nigel Taod2075ce2018-04-25 15:26:29 +10001// Copyright 2018 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// This file contains a hand-written C benchmark of different strategies for
16// decoding PNG data.
17//
18// For a PNG image with width W and height H, the H rows can be decompressed
19// one-at-a-time or all-at-once. Roughly speaking, this corresponds to H versus
20// 1 call into the zlib decoder. The former (call it "fragmented dst") requires
21// less scratch-space memory than the latter ("full dst"): 2 * bytes_per_row
22// instead of H * bytes_per row, but the latter can be faster.
23//
24// The zlib-compressed data can be split into multiple IDAT chunks. Similarly,
25// these chunks can be decompressed separately ("fragmented IDAT") or together
26// ("full IDAT"), again providing a memory vs speed trade-off.
27//
28// This program reports the speed of combining the independent frag/full dst
29// and frag/full IDAT techniques.
30//
Nigel Tao95d02ee2019-01-12 12:34:40 +110031// For example, with gcc 7.3 (and -O3) as of January 2019:
Nigel Taod2075ce2018-04-25 15:26:29 +100032//
33// On ../test/data/hat.png (90 × 112 pixels):
34// name time/op relative
Nigel Tao95d02ee2019-01-12 12:34:40 +110035// FragDstFragIDAT/gcc 286µs ± 1% 1.00x
36// FragDstFullIDAT/gcc 286µs ± 0% 1.00x
37// FullDstFragIDAT/gcc 214µs ± 1% 1.34x
38// FullDstFullIDAT/gcc 146µs ± 1% 1.96x
Nigel Taod2075ce2018-04-25 15:26:29 +100039//
Nigel Tao95d02ee2019-01-12 12:34:40 +110040// On ../test/data/hibiscus.regular.png (312 × 442 pixels):
Nigel Taod2075ce2018-04-25 15:26:29 +100041// name time/op relative
Nigel Tao95d02ee2019-01-12 12:34:40 +110042// FragDstFragIDAT/gcc 3.58ms ± 1% 1.00x
43// FragDstFullIDAT/gcc 3.54ms ± 1% 1.01x
44// FullDstFragIDAT/gcc 3.09ms ± 1% 1.16x
45// FullDstFullIDAT/gcc 1.99ms ± 1% 1.80x
Nigel Taod2075ce2018-04-25 15:26:29 +100046//
47// On ../test/data/harvesters.png (1165 × 859 pixels):
48// name time/op relative
Nigel Tao95d02ee2019-01-12 12:34:40 +110049// FragDstFragIDAT/gcc 25.4ms ± 2% 1.00x
50// FragDstFullIDAT/gcc 25.2ms ± 0% 1.01x
51// FullDstFragIDAT/gcc 22.2ms ± 0% 1.14x
52// FullDstFullIDAT/gcc 14.0ms ± 1% 1.81x
Nigel Taod2075ce2018-04-25 15:26:29 +100053
54#include <errno.h>
55#include <inttypes.h>
56#include <stdio.h>
57#include <string.h>
58#include <sys/time.h>
59#include <unistd.h>
60
Nigel Taoa0bafff2018-07-14 08:59:36 +100061// Wuffs ships as a "single file C library" or "header file library" as per
62// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
63//
64// To use that single file as a "foo.c"-like implementation, instead of a
65// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
66// compiling it.
67#define WUFFS_IMPLEMENTATION
68
Jimmy Casey3c50ada2018-07-26 17:12:19 +000069// If building this program in an environment that doesn't easily accommodate
Nigel Taod2075ce2018-04-25 15:26:29 +100070// relative includes, you can use the script/inline-c-relative-includes.go
71// program to generate a stand-alone C file.
Nigel Tao95d02ee2019-01-12 12:34:40 +110072#include "../release/c/wuffs-unsupported-snapshot.c"
Nigel Taod2075ce2018-04-25 15:26:29 +100073
74// The order matters here. Clang also defines "__GNUC__".
75#if defined(__clang__)
76const char* cc = "clang";
77const char* cc_version = __clang_version__;
78#elif defined(__GNUC__)
79const char* cc = "gcc";
80const char* cc_version = __VERSION__;
81#elif defined(_MSC_VER)
82const char* cc = "cl";
83const char* cc_version = "???";
84#else
85const char* cc = "cc";
86const char* cc_version = "???";
87#endif
88
89static inline uint32_t load_u32be(uint8_t* p) {
90 return ((uint32_t)(p[0]) << 24) | ((uint32_t)(p[1]) << 16) |
91 ((uint32_t)(p[2]) << 8) | ((uint32_t)(p[3]) << 0);
92}
93
94// Limit the input PNG image (and therefore its IDAT data) to (64 MiB - 1 byte)
95// compressed, in up to 1024 IDAT chunks, and 256 MiB and 16384 × 16384 pixels
96// uncompressed. This is a limitation of this program (which uses the Wuffs
97// standard library), not a limitation of Wuffs per se.
98#define DST_BUFFER_SIZE (256 * 1024 * 1024)
99#define SRC_BUFFER_SIZE (64 * 1024 * 1024)
100#define MAX_DIMENSION (16384)
101#define MAX_IDAT_CHUNKS (1024)
102
103uint8_t dst_buffer[DST_BUFFER_SIZE] = {0};
104size_t dst_len = 0;
105uint8_t src_buffer[SRC_BUFFER_SIZE] = {0};
106size_t src_len = 0;
107uint8_t idat_buffer[SRC_BUFFER_SIZE] = {0};
108// The n'th IDAT chunk data (where n is a zero-based count) is in
109// idat_buffer[i:j], where i = idat_splits[n+0] and j = idat_splits[n+1].
110size_t idat_splits[MAX_IDAT_CHUNKS + 1] = {0};
111uint32_t num_idat_chunks = 0;
112
113uint32_t width = 0;
114uint32_t height = 0;
115uint64_t bytes_per_pixel = 0;
116uint64_t bytes_per_row = 0;
117uint64_t bytes_per_frame = 0;
118
119const char* read_stdin() {
120 while (src_len < SRC_BUFFER_SIZE) {
121 const int stdin_fd = 0;
122 ssize_t n = read(stdin_fd, src_buffer + src_len, SRC_BUFFER_SIZE - src_len);
123 if (n > 0) {
124 src_len += n;
125 } else if (n == 0) {
126 return NULL;
127 } else if (errno == EINTR) {
128 // No-op.
129 } else {
130 return strerror(errno);
131 }
132 }
133 return "input is too large";
134}
135
136const char* process_png_chunks(uint8_t* p, size_t n) {
137 while (n > 0) {
138 // Process the 8 byte chunk header.
139 if (n < 8) {
140 return "invalid PNG chunk";
141 }
142 uint32_t chunk_len = load_u32be(p + 0);
143 uint32_t chunk_type = load_u32be(p + 4);
144 p += 8;
145 n -= 8;
146
147 // Process the chunk payload.
148 if (n < chunk_len) {
149 return "short PNG chunk data";
150 }
151 switch (chunk_type) {
152 case 0x49484452: // "IHDR"
153 if (chunk_len != 13) {
154 return "invalid PNG IDAT chunk";
155 }
156 width = load_u32be(p + 0);
157 height = load_u32be(p + 4);
158 if ((width == 0) || (height == 0)) {
159 return "image dimensions are too small";
160 }
161 if ((width > MAX_DIMENSION) || (height > MAX_DIMENSION)) {
162 return "image dimensions are too large";
163 }
164 if (p[8] != 8) {
165 return "unsupported PNG bit depth";
166 }
167 if (bytes_per_pixel != 0) {
168 return "duplicate PNG IHDR chunk";
169 }
170 // Process the color type, as per the PNG spec table 11.1.
171 switch (p[9]) {
172 case 0:
173 bytes_per_pixel = 1;
174 break;
175 case 2:
176 bytes_per_pixel = 3;
177 break;
178 case 3:
179 bytes_per_pixel = 1;
180 break;
181 case 4:
182 bytes_per_pixel = 2;
183 break;
184 case 6:
185 bytes_per_pixel = 4;
186 break;
187 default:
188 return "unsupported PNG color type";
189 }
190 if (p[12] != 0) {
191 return "unsupported PNG interlacing";
192 }
193 break;
194
195 case 0x49444154: // "IDAT"
196 if (num_idat_chunks == MAX_IDAT_CHUNKS - 1) {
197 return "too many IDAT chunks";
198 }
199 memcpy(idat_buffer + idat_splits[num_idat_chunks], p, chunk_len);
200 idat_splits[num_idat_chunks + 1] =
201 idat_splits[num_idat_chunks] + chunk_len;
202 num_idat_chunks++;
203 break;
204 }
205 p += chunk_len;
206 n -= chunk_len;
207
208 // Process (and ignore) the 4 byte chunk footer (a checksum).
209 if (n < 4) {
210 return "invalid PNG chunk";
211 }
212 p += 4;
213 n -= 4;
214 }
215 return NULL;
216}
217
218const char* decode_once(bool frag_dst, bool frag_idat) {
Nigel Taoa0bafff2018-07-14 08:59:36 +1000219 wuffs_zlib__decoder dec = ((wuffs_zlib__decoder){});
Nigel Tao95d02ee2019-01-12 12:34:40 +1100220 const char* status =
221 wuffs_zlib__decoder__check_wuffs_version(&dec, sizeof dec, WUFFS_VERSION);
222 if (status) {
223 return status;
224 }
Nigel Taod2075ce2018-04-25 15:26:29 +1000225
Nigel Tao95d02ee2019-01-12 12:34:40 +1100226 wuffs_base__io_buffer dst = ((wuffs_base__io_buffer){
227 .data = ((wuffs_base__slice_u8){
228 .ptr = dst_buffer,
229 .len = bytes_per_frame,
230 }),
231 });
232 wuffs_base__io_buffer idat = ((wuffs_base__io_buffer){
233 .data = ((wuffs_base__slice_u8){
234 .ptr = idat_buffer,
235 .len = SRC_BUFFER_SIZE,
236 }),
237 .meta = ((wuffs_base__io_buffer_meta){
238 .wi = idat_splits[num_idat_chunks],
239 .ri = 0,
240 .pos = 0,
241 .closed = true,
242 }),
243 });
Nigel Tao8827e222018-04-27 20:54:44 +1000244 wuffs_base__io_writer dst_writer = wuffs_base__io_buffer__writer(&dst);
245 wuffs_base__io_reader idat_reader = wuffs_base__io_buffer__reader(&idat);
Nigel Taod2075ce2018-04-25 15:26:29 +1000246
247 uint32_t i = 0; // Number of dst fragments processed, if frag_dst.
248 if (frag_dst) {
Nigel Tao95d02ee2019-01-12 12:34:40 +1100249 dst.data.len = bytes_per_row;
Nigel Taod2075ce2018-04-25 15:26:29 +1000250 }
251
252 uint32_t j = 0; // Number of IDAT fragments processed, if frag_idat.
253 if (frag_idat) {
Nigel Tao95d02ee2019-01-12 12:34:40 +1100254 idat.meta.wi = idat_splits[1];
255 idat.meta.closed = (num_idat_chunks == 1);
Nigel Taod2075ce2018-04-25 15:26:29 +1000256 }
257
258 while (true) {
Nigel Tao95d02ee2019-01-12 12:34:40 +1100259 status =
260 wuffs_zlib__decoder__decode_io_writer(&dec, dst_writer, idat_reader);
Nigel Taod2075ce2018-04-25 15:26:29 +1000261
Nigel Tao95d02ee2019-01-12 12:34:40 +1100262 if (!status) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000263 break;
264 }
Nigel Tao95d02ee2019-01-12 12:34:40 +1100265 if ((status == wuffs_base__suspension__short_write) && frag_dst &&
Nigel Taod2075ce2018-04-25 15:26:29 +1000266 (i < height - 1)) {
267 i++;
Nigel Tao95d02ee2019-01-12 12:34:40 +1100268 dst.data.len = bytes_per_row * (i + 1);
Nigel Taod2075ce2018-04-25 15:26:29 +1000269 continue;
270 }
Nigel Tao95d02ee2019-01-12 12:34:40 +1100271 if ((status == wuffs_base__suspension__short_read) && frag_idat &&
Nigel Taod2075ce2018-04-25 15:26:29 +1000272 (j < num_idat_chunks - 1)) {
273 j++;
Nigel Tao95d02ee2019-01-12 12:34:40 +1100274 idat.meta.wi = idat_splits[j + 1];
275 idat.meta.closed = (num_idat_chunks == j + 1);
Nigel Taod2075ce2018-04-25 15:26:29 +1000276 continue;
277 }
Nigel Tao95d02ee2019-01-12 12:34:40 +1100278 return status;
Nigel Taod2075ce2018-04-25 15:26:29 +1000279 }
280
Nigel Tao95d02ee2019-01-12 12:34:40 +1100281 if (dst.meta.wi != bytes_per_frame) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000282 return "unexpected number of bytes decoded";
283 }
284 return NULL;
285}
286
287const char* decode(bool frag_dst, bool frag_idat) {
288 int reps;
289 if (bytes_per_frame < 100000) {
290 reps = 1000;
291 } else if (bytes_per_frame < 1000000) {
292 reps = 100;
293 } else if (bytes_per_frame < 10000000) {
294 reps = 10;
295 } else {
296 reps = 1;
297 }
298
299 struct timeval bench_start_tv;
300 gettimeofday(&bench_start_tv, NULL);
301
302 int i;
303 for (i = 0; i < reps; i++) {
304 const char* msg = decode_once(frag_dst, frag_idat);
305 if (msg) {
306 return msg;
307 }
308 }
309
310 struct timeval bench_finish_tv;
311 gettimeofday(&bench_finish_tv, NULL);
312 int64_t micros =
313 (int64_t)(bench_finish_tv.tv_sec - bench_start_tv.tv_sec) * 1000000 +
314 (int64_t)(bench_finish_tv.tv_usec - bench_start_tv.tv_usec);
315 uint64_t nanos = 1;
316 if (micros > 0) {
317 nanos = (uint64_t)(micros)*1000;
318 }
319
320 printf("Benchmark%sDst%sIDAT/%s\t%8d\t%8" PRIu64 " ns/op\n",
321 frag_dst ? "Frag" : "Full", //
322 frag_idat ? "Frag" : "Full", //
323 cc, reps, nanos / reps);
324
325 return NULL;
326}
327
328int fail(const char* msg) {
329 const int stderr_fd = 2;
330 write(stderr_fd, msg, strnlen(msg, 4095));
331 write(stderr_fd, "\n", 1);
332 return 1;
333}
334
335int main(int argc, char** argv) {
336 const char* msg = read_stdin();
337 if (msg) {
338 return fail(msg);
339 }
Nigel Tao95d02ee2019-01-12 12:34:40 +1100340 if ((src_len < 8) ||
341 strncmp((const char*)(src_buffer), "\x89PNG\x0D\x0A\x1A\x0A", 8)) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000342 return fail("invalid PNG");
343 }
344 msg = process_png_chunks(src_buffer + 8, src_len - 8);
345 if (msg) {
346 return fail(msg);
347 }
348 if (bytes_per_pixel == 0) {
349 return fail("missing PNG IHDR chunk");
350 }
351 if (num_idat_chunks == 0) {
352 return fail("missing PNG IDAT chunk");
353 }
354 // The +1 here is for the per-row filter byte.
355 bytes_per_row = (uint64_t)width * bytes_per_pixel + 1;
356 bytes_per_frame = (uint64_t)height * bytes_per_row;
357 if (bytes_per_frame > DST_BUFFER_SIZE) {
358 return fail("decompressed data is too large");
359 }
360
361 printf("# %s version %s\n#\n", cc, cc_version);
362 printf(
363 "# The output format, including the \"Benchmark\" prefixes, is "
364 "compatible with the\n"
365 "# https://godoc.org/golang.org/x/perf/cmd/benchstat tool. To install "
366 "it, first\n"
367 "# install Go, then run \"go get golang.org/x/perf/cmd/benchstat\".\n");
368
369 int i;
370 for (i = 0; i < 5; i++) {
371 msg = decode(true, true);
372 if (msg) {
373 return fail(msg);
374 }
375 msg = decode(true, false);
376 if (msg) {
377 return fail(msg);
378 }
379 msg = decode(false, true);
380 if (msg) {
381 return fail(msg);
382 }
383 msg = decode(false, false);
384 if (msg) {
385 return fail(msg);
386 }
387 }
388
389 return 0;
390}