blob: 37b7c1e5f09a410888c31ee4f597343eb6a1deab [file] [log] [blame]
Nigel Taod2075ce2018-04-25 15:26:29 +10001// Copyright 2018 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Nigel Tao670b45b2019-02-02 08:10:08 +110015// ----------------
16
Nigel Taod2075ce2018-04-25 15:26:29 +100017// This file contains a hand-written C benchmark of different strategies for
18// decoding PNG data.
19//
20// For a PNG image with width W and height H, the H rows can be decompressed
21// one-at-a-time or all-at-once. Roughly speaking, this corresponds to H versus
22// 1 call into the zlib decoder. The former (call it "fragmented dst") requires
23// less scratch-space memory than the latter ("full dst"): 2 * bytes_per_row
24// instead of H * bytes_per row, but the latter can be faster.
25//
26// The zlib-compressed data can be split into multiple IDAT chunks. Similarly,
27// these chunks can be decompressed separately ("fragmented IDAT") or together
28// ("full IDAT"), again providing a memory vs speed trade-off.
29//
30// This program reports the speed of combining the independent frag/full dst
31// and frag/full IDAT techniques.
32//
Nigel Tao95d02ee2019-01-12 12:34:40 +110033// For example, with gcc 7.3 (and -O3) as of January 2019:
Nigel Taod2075ce2018-04-25 15:26:29 +100034//
35// On ../test/data/hat.png (90 × 112 pixels):
36// name time/op relative
Nigel Taoc6a48b32019-01-19 10:50:40 +110037// FragDstFragIDAT/gcc 289µs ± 1% 1.00x
38// FragDstFullIDAT/gcc 288µs ± 0% 1.00x
39// FullDstFragIDAT/gcc 149µs ± 1% 1.93x
40// FullDstFullIDAT/gcc 148µs ± 1% 1.95x
Nigel Taod2075ce2018-04-25 15:26:29 +100041//
Nigel Tao95d02ee2019-01-12 12:34:40 +110042// On ../test/data/hibiscus.regular.png (312 × 442 pixels):
Nigel Taod2075ce2018-04-25 15:26:29 +100043// name time/op relative
Nigel Taoc6a48b32019-01-19 10:50:40 +110044// FragDstFragIDAT/gcc 2.49ms ± 0% 1.00x
45// FragDstFullIDAT/gcc 2.49ms ± 0% 1.00x
46// FullDstFragIDAT/gcc 2.08ms ± 0% 1.20x
47// FullDstFullIDAT/gcc 2.02ms ± 1% 1.23x
Nigel Taod2075ce2018-04-25 15:26:29 +100048//
49// On ../test/data/harvesters.png (1165 × 859 pixels):
50// name time/op relative
Nigel Taoc6a48b32019-01-19 10:50:40 +110051// FragDstFragIDAT/gcc 15.6ms ± 2% 1.00x
52// FragDstFullIDAT/gcc 15.4ms ± 0% 1.01x
53// FullDstFragIDAT/gcc 14.4ms ± 0% 1.08x
54// FullDstFullIDAT/gcc 14.1ms ± 0% 1.10x
Nigel Taod2075ce2018-04-25 15:26:29 +100055
56#include <errno.h>
57#include <inttypes.h>
58#include <stdio.h>
59#include <string.h>
60#include <sys/time.h>
61#include <unistd.h>
62
Nigel Taoa0bafff2018-07-14 08:59:36 +100063// Wuffs ships as a "single file C library" or "header file library" as per
64// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
65//
66// To use that single file as a "foo.c"-like implementation, instead of a
67// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
68// compiling it.
69#define WUFFS_IMPLEMENTATION
70
Jimmy Casey3c50ada2018-07-26 17:12:19 +000071// If building this program in an environment that doesn't easily accommodate
Nigel Taod2075ce2018-04-25 15:26:29 +100072// relative includes, you can use the script/inline-c-relative-includes.go
73// program to generate a stand-alone C file.
Nigel Tao95d02ee2019-01-12 12:34:40 +110074#include "../release/c/wuffs-unsupported-snapshot.c"
Nigel Taod2075ce2018-04-25 15:26:29 +100075
76// The order matters here. Clang also defines "__GNUC__".
77#if defined(__clang__)
Nigel Taoc0bd9df2020-03-26 21:55:44 +110078const char* g_cc = "clang";
79const char* g_cc_version = __clang_version__;
Nigel Taod2075ce2018-04-25 15:26:29 +100080#elif defined(__GNUC__)
Nigel Taoc0bd9df2020-03-26 21:55:44 +110081const char* g_cc = "gcc";
82const char* g_cc_version = __VERSION__;
Nigel Taod2075ce2018-04-25 15:26:29 +100083#elif defined(_MSC_VER)
Nigel Taoc0bd9df2020-03-26 21:55:44 +110084const char* g_cc = "cl";
85const char* g_cc_version = "???";
Nigel Taod2075ce2018-04-25 15:26:29 +100086#else
Nigel Taoc0bd9df2020-03-26 21:55:44 +110087const char* g_cc = "cc";
88const char* g_cc_version = "???";
Nigel Taod2075ce2018-04-25 15:26:29 +100089#endif
90
Nigel Tao2914bae2020-02-26 09:40:30 +110091static inline uint32_t //
92load_u32be(uint8_t* p) {
Nigel Taod2075ce2018-04-25 15:26:29 +100093 return ((uint32_t)(p[0]) << 24) | ((uint32_t)(p[1]) << 16) |
94 ((uint32_t)(p[2]) << 8) | ((uint32_t)(p[3]) << 0);
95}
96
97// Limit the input PNG image (and therefore its IDAT data) to (64 MiB - 1 byte)
98// compressed, in up to 1024 IDAT chunks, and 256 MiB and 16384 × 16384 pixels
99// uncompressed. This is a limitation of this program (which uses the Wuffs
100// standard library), not a limitation of Wuffs per se.
Nigel Taofdac24a2020-03-06 21:53:08 +1100101#define DST_BUFFER_ARRAY_SIZE (256 * 1024 * 1024)
102#define SRC_BUFFER_ARRAY_SIZE (64 * 1024 * 1024)
Nigel Taod2075ce2018-04-25 15:26:29 +1000103#define MAX_DIMENSION (16384)
104#define MAX_IDAT_CHUNKS (1024)
105
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100106uint8_t g_dst_buffer_array[DST_BUFFER_ARRAY_SIZE] = {0};
107size_t g_dst_len = 0;
108uint8_t g_src_buffer_array[SRC_BUFFER_ARRAY_SIZE] = {0};
109size_t g_src_len = 0;
110uint8_t g_idat_buffer_array[SRC_BUFFER_ARRAY_SIZE] = {0};
Nigel Taod2075ce2018-04-25 15:26:29 +1000111// The n'th IDAT chunk data (where n is a zero-based count) is in
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100112// g_idat_buffer_array[i:j], where i = g_idat_splits[n+0] and j =
113// g_idat_splits[n+1].
114size_t g_idat_splits[MAX_IDAT_CHUNKS + 1] = {0};
115uint32_t g_num_idat_chunks = 0;
Nigel Taod2075ce2018-04-25 15:26:29 +1000116
Nigel Taofdac24a2020-03-06 21:53:08 +1100117#define WORK_BUFFER_ARRAY_SIZE \
118 WUFFS_ZLIB__DECODER_WORKBUF_LEN_MAX_INCL_WORST_CASE
119#if WORK_BUFFER_ARRAY_SIZE > 0
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100120uint8_t g_work_buffer_array[WORK_BUFFER_ARRAY_SIZE];
Nigel Tao53760ce2019-02-16 14:18:25 +1100121#else
122// Not all C/C++ compilers support 0-length arrays.
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100123uint8_t g_work_buffer_array[1];
Nigel Tao53760ce2019-02-16 14:18:25 +1100124#endif
125
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100126uint32_t g_width = 0;
127uint32_t g_height = 0;
128uint64_t g_bytes_per_pixel = 0;
129uint64_t g_bytes_per_row = 0;
130uint64_t g_bytes_per_frame = 0;
Nigel Taod2075ce2018-04-25 15:26:29 +1000131
Nigel Tao2914bae2020-02-26 09:40:30 +1100132const char* //
133read_stdin() {
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100134 while (g_src_len < SRC_BUFFER_ARRAY_SIZE) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000135 const int stdin_fd = 0;
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100136 ssize_t n = read(stdin_fd, g_src_buffer_array + g_src_len,
137 SRC_BUFFER_ARRAY_SIZE - g_src_len);
Nigel Taod2075ce2018-04-25 15:26:29 +1000138 if (n > 0) {
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100139 g_src_len += n;
Nigel Taod2075ce2018-04-25 15:26:29 +1000140 } else if (n == 0) {
141 return NULL;
142 } else if (errno == EINTR) {
143 // No-op.
144 } else {
145 return strerror(errno);
146 }
147 }
148 return "input is too large";
149}
150
Nigel Tao2914bae2020-02-26 09:40:30 +1100151const char* //
152process_png_chunks(uint8_t* p, size_t n) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000153 while (n > 0) {
154 // Process the 8 byte chunk header.
155 if (n < 8) {
156 return "invalid PNG chunk";
157 }
158 uint32_t chunk_len = load_u32be(p + 0);
159 uint32_t chunk_type = load_u32be(p + 4);
160 p += 8;
161 n -= 8;
162
163 // Process the chunk payload.
164 if (n < chunk_len) {
165 return "short PNG chunk data";
166 }
167 switch (chunk_type) {
168 case 0x49484452: // "IHDR"
169 if (chunk_len != 13) {
170 return "invalid PNG IDAT chunk";
171 }
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100172 g_width = load_u32be(p + 0);
173 g_height = load_u32be(p + 4);
174 if ((g_width == 0) || (g_height == 0)) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000175 return "image dimensions are too small";
176 }
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100177 if ((g_width > MAX_DIMENSION) || (g_height > MAX_DIMENSION)) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000178 return "image dimensions are too large";
179 }
180 if (p[8] != 8) {
181 return "unsupported PNG bit depth";
182 }
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100183 if (g_bytes_per_pixel != 0) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000184 return "duplicate PNG IHDR chunk";
185 }
186 // Process the color type, as per the PNG spec table 11.1.
187 switch (p[9]) {
188 case 0:
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100189 g_bytes_per_pixel = 1;
Nigel Taod2075ce2018-04-25 15:26:29 +1000190 break;
191 case 2:
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100192 g_bytes_per_pixel = 3;
Nigel Taod2075ce2018-04-25 15:26:29 +1000193 break;
194 case 3:
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100195 g_bytes_per_pixel = 1;
Nigel Taod2075ce2018-04-25 15:26:29 +1000196 break;
197 case 4:
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100198 g_bytes_per_pixel = 2;
Nigel Taod2075ce2018-04-25 15:26:29 +1000199 break;
200 case 6:
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100201 g_bytes_per_pixel = 4;
Nigel Taod2075ce2018-04-25 15:26:29 +1000202 break;
203 default:
204 return "unsupported PNG color type";
205 }
206 if (p[12] != 0) {
207 return "unsupported PNG interlacing";
208 }
209 break;
210
211 case 0x49444154: // "IDAT"
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100212 if (g_num_idat_chunks == MAX_IDAT_CHUNKS - 1) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000213 return "too many IDAT chunks";
214 }
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100215 memcpy(g_idat_buffer_array + g_idat_splits[g_num_idat_chunks], p,
216 chunk_len);
217 g_idat_splits[g_num_idat_chunks + 1] =
218 g_idat_splits[g_num_idat_chunks] + chunk_len;
219 g_num_idat_chunks++;
Nigel Taod2075ce2018-04-25 15:26:29 +1000220 break;
221 }
222 p += chunk_len;
223 n -= chunk_len;
224
225 // Process (and ignore) the 4 byte chunk footer (a checksum).
226 if (n < 4) {
227 return "invalid PNG chunk";
228 }
229 p += 4;
230 n -= 4;
231 }
232 return NULL;
233}
234
Nigel Tao2914bae2020-02-26 09:40:30 +1100235const char* //
236decode_once(bool frag_dst, bool frag_idat) {
Nigel Tao53760ce2019-02-16 14:18:25 +1100237 wuffs_zlib__decoder dec;
Nigel Tao5f8d2da2020-01-10 22:06:50 +1100238 wuffs_base__status status =
Nigel Tao53760ce2019-02-16 14:18:25 +1100239 wuffs_zlib__decoder__initialize(&dec, sizeof dec, WUFFS_VERSION, 0);
Nigel Tao5f8d2da2020-01-10 22:06:50 +1100240 if (!wuffs_base__status__is_ok(&status)) {
241 return wuffs_base__status__message(&status);
Nigel Tao95d02ee2019-01-12 12:34:40 +1100242 }
Nigel Taod2075ce2018-04-25 15:26:29 +1000243
Nigel Tao95d02ee2019-01-12 12:34:40 +1100244 wuffs_base__io_buffer dst = ((wuffs_base__io_buffer){
245 .data = ((wuffs_base__slice_u8){
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100246 .ptr = g_dst_buffer_array,
247 .len = g_bytes_per_frame,
Nigel Tao95d02ee2019-01-12 12:34:40 +1100248 }),
249 });
250 wuffs_base__io_buffer idat = ((wuffs_base__io_buffer){
251 .data = ((wuffs_base__slice_u8){
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100252 .ptr = g_idat_buffer_array,
Nigel Taofdac24a2020-03-06 21:53:08 +1100253 .len = SRC_BUFFER_ARRAY_SIZE,
Nigel Tao95d02ee2019-01-12 12:34:40 +1100254 }),
255 .meta = ((wuffs_base__io_buffer_meta){
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100256 .wi = g_idat_splits[g_num_idat_chunks],
Nigel Tao95d02ee2019-01-12 12:34:40 +1100257 .ri = 0,
258 .pos = 0,
259 .closed = true,
260 }),
261 });
Nigel Taod2075ce2018-04-25 15:26:29 +1000262
263 uint32_t i = 0; // Number of dst fragments processed, if frag_dst.
264 if (frag_dst) {
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100265 dst.data.len = g_bytes_per_row;
Nigel Taod2075ce2018-04-25 15:26:29 +1000266 }
267
268 uint32_t j = 0; // Number of IDAT fragments processed, if frag_idat.
269 if (frag_idat) {
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100270 idat.meta.wi = g_idat_splits[1];
271 idat.meta.closed = (g_num_idat_chunks == 1);
Nigel Taod2075ce2018-04-25 15:26:29 +1000272 }
273
274 while (true) {
Nigel Taofdac24a2020-03-06 21:53:08 +1100275 status =
276 wuffs_zlib__decoder__transform_io(&dec, &dst, &idat,
277 ((wuffs_base__slice_u8){
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100278 .ptr = g_work_buffer_array,
Nigel Taofdac24a2020-03-06 21:53:08 +1100279 .len = WORK_BUFFER_ARRAY_SIZE,
280 }));
Nigel Taod2075ce2018-04-25 15:26:29 +1000281
Nigel Tao5f8d2da2020-01-10 22:06:50 +1100282 if (wuffs_base__status__is_ok(&status)) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000283 break;
284 }
Nigel Tao5f8d2da2020-01-10 22:06:50 +1100285 if ((status.repr == wuffs_base__suspension__short_write) && frag_dst &&
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100286 (i < g_height - 1)) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000287 i++;
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100288 dst.data.len = g_bytes_per_row * (i + 1);
Nigel Taod2075ce2018-04-25 15:26:29 +1000289 continue;
290 }
Nigel Tao5f8d2da2020-01-10 22:06:50 +1100291 if ((status.repr == wuffs_base__suspension__short_read) && frag_idat &&
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100292 (j < g_num_idat_chunks - 1)) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000293 j++;
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100294 idat.meta.wi = g_idat_splits[j + 1];
295 idat.meta.closed = (g_num_idat_chunks == j + 1);
Nigel Taod2075ce2018-04-25 15:26:29 +1000296 continue;
297 }
Nigel Tao5f8d2da2020-01-10 22:06:50 +1100298 return wuffs_base__status__message(&status);
Nigel Taod2075ce2018-04-25 15:26:29 +1000299 }
300
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100301 if (dst.meta.wi != g_bytes_per_frame) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000302 return "unexpected number of bytes decoded";
303 }
304 return NULL;
305}
306
Nigel Tao2914bae2020-02-26 09:40:30 +1100307const char* //
308decode(bool frag_dst, bool frag_idat) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000309 int reps;
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100310 if (g_bytes_per_frame < 100000) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000311 reps = 1000;
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100312 } else if (g_bytes_per_frame < 1000000) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000313 reps = 100;
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100314 } else if (g_bytes_per_frame < 10000000) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000315 reps = 10;
316 } else {
317 reps = 1;
318 }
319
320 struct timeval bench_start_tv;
321 gettimeofday(&bench_start_tv, NULL);
322
323 int i;
324 for (i = 0; i < reps; i++) {
325 const char* msg = decode_once(frag_dst, frag_idat);
326 if (msg) {
327 return msg;
328 }
329 }
330
331 struct timeval bench_finish_tv;
332 gettimeofday(&bench_finish_tv, NULL);
333 int64_t micros =
334 (int64_t)(bench_finish_tv.tv_sec - bench_start_tv.tv_sec) * 1000000 +
335 (int64_t)(bench_finish_tv.tv_usec - bench_start_tv.tv_usec);
336 uint64_t nanos = 1;
337 if (micros > 0) {
338 nanos = (uint64_t)(micros)*1000;
339 }
340
341 printf("Benchmark%sDst%sIDAT/%s\t%8d\t%8" PRIu64 " ns/op\n",
342 frag_dst ? "Frag" : "Full", //
343 frag_idat ? "Frag" : "Full", //
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100344 g_cc, reps, nanos / reps);
Nigel Taod2075ce2018-04-25 15:26:29 +1000345
346 return NULL;
347}
348
Nigel Tao2914bae2020-02-26 09:40:30 +1100349int //
350fail(const char* msg) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000351 const int stderr_fd = 2;
352 write(stderr_fd, msg, strnlen(msg, 4095));
353 write(stderr_fd, "\n", 1);
354 return 1;
355}
356
Nigel Tao2914bae2020-02-26 09:40:30 +1100357int //
358main(int argc, char** argv) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000359 const char* msg = read_stdin();
360 if (msg) {
361 return fail(msg);
362 }
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100363 if ((g_src_len < 8) || strncmp((const char*)(g_src_buffer_array),
364 "\x89PNG\x0D\x0A\x1A\x0A", 8)) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000365 return fail("invalid PNG");
366 }
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100367 msg = process_png_chunks(g_src_buffer_array + 8, g_src_len - 8);
Nigel Taod2075ce2018-04-25 15:26:29 +1000368 if (msg) {
369 return fail(msg);
370 }
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100371 if (g_bytes_per_pixel == 0) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000372 return fail("missing PNG IHDR chunk");
373 }
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100374 if (g_num_idat_chunks == 0) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000375 return fail("missing PNG IDAT chunk");
376 }
377 // The +1 here is for the per-row filter byte.
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100378 g_bytes_per_row = (uint64_t)g_width * g_bytes_per_pixel + 1;
379 g_bytes_per_frame = (uint64_t)g_height * g_bytes_per_row;
380 if (g_bytes_per_frame > DST_BUFFER_ARRAY_SIZE) {
Nigel Taod2075ce2018-04-25 15:26:29 +1000381 return fail("decompressed data is too large");
382 }
383
Nigel Taoc0bd9df2020-03-26 21:55:44 +1100384 printf("# %s version %s\n#\n", g_cc, g_cc_version);
Nigel Taod2075ce2018-04-25 15:26:29 +1000385 printf(
386 "# The output format, including the \"Benchmark\" prefixes, is "
387 "compatible with the\n"
388 "# https://godoc.org/golang.org/x/perf/cmd/benchstat tool. To install "
389 "it, first\n"
390 "# install Go, then run \"go get golang.org/x/perf/cmd/benchstat\".\n");
391
392 int i;
393 for (i = 0; i < 5; i++) {
394 msg = decode(true, true);
395 if (msg) {
396 return fail(msg);
397 }
398 msg = decode(true, false);
399 if (msg) {
400 return fail(msg);
401 }
402 msg = decode(false, true);
403 if (msg) {
404 return fail(msg);
405 }
406 msg = decode(false, false);
407 if (msg) {
408 return fail(msg);
409 }
410 }
411
412 return 0;
413}