Blame - script/bench-c-deflate-fragmentation.c - skia.googlesource.com/external/github.com/google/wuffs

blob: e33322a18f1e6096bc8bf8bc75fbcca66535b4a6 [file] [log] [blame]

Nigel Tao	d2075ce	2018-04-25 15:26:29 +1000	[diff] [blame]	1	// Copyright 2018 The Wuffs Authors.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// https://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	// This file contains a hand-written C benchmark of different strategies for
				16	// decoding PNG data.
				17	//
				18	// For a PNG image with width W and height H, the H rows can be decompressed
				19	// one-at-a-time or all-at-once. Roughly speaking, this corresponds to H versus
				20	// 1 call into the zlib decoder. The former (call it "fragmented dst") requires
				21	// less scratch-space memory than the latter ("full dst"): 2 * bytes_per_row
				22	// instead of H * bytes_per row, but the latter can be faster.
				23	//
				24	// The zlib-compressed data can be split into multiple IDAT chunks. Similarly,
				25	// these chunks can be decompressed separately ("fragmented IDAT") or together
				26	// ("full IDAT"), again providing a memory vs speed trade-off.
				27	//
				28	// This program reports the speed of combining the independent frag/full dst
				29	// and frag/full IDAT techniques.
				30	//
				31	// For example, with gcc 7.3 (and -O3) as of April 2018:
				32	//
				33	// On ../test/data/hat.png (90 × 112 pixels):
				34	// name time/op relative
				35	// FragDstFragIDAT/gcc 203µs ± 1% 1.00x
				36	// FragDstFullIDAT/gcc 203µs ± 0% 1.00x
				37	// FullDstFragIDAT/gcc 170µs ± 0% 1.19x
				38	// FullDstFullIDAT/gcc 147µs ± 0% 1.38x
				39	//
				40	// On ../test/data/hibiscus.png (312 × 442 pixels):
				41	// name time/op relative
				42	// FragDstFragIDAT/gcc 2.62ms ± 1% 1.00x
				43	// FragDstFullIDAT/gcc 2.61ms ± 1% 1.00x
				44	// FullDstFragIDAT/gcc 2.44ms ± 0% 1.07x
				45	// FullDstFullIDAT/gcc 2.03ms ± 1% 1.29x
				46	//
				47	// On ../test/data/harvesters.png (1165 × 859 pixels):
				48	// name time/op relative
				49	// FragDstFragIDAT/gcc 18.3ms ± 0% 1.00x
				50	// FragDstFullIDAT/gcc 18.3ms ± 1% 1.00x
				51	// FullDstFragIDAT/gcc 17.1ms ± 0% 1.07x
				52	// FullDstFullIDAT/gcc 13.9ms ± 0% 1.32x
				53
				54	#include <errno.h>
				55	#include <inttypes.h>
				56	#include <stdio.h>
				57	#include <string.h>
				58	#include <sys/time.h>
				59	#include <unistd.h>
				60
Nigel Tao	a0bafff	2018-07-14 08:59:36 +1000	[diff] [blame]	61	// Wuffs ships as a "single file C library" or "header file library" as per
				62	// https://github.com/nothings/stb/blob/master/docs/stb_howto.txt
				63	//
				64	// To use that single file as a "foo.c"-like implementation, instead of a
				65	// "foo.h"-like header, #define WUFFS_IMPLEMENTATION before #include'ing or
				66	// compiling it.
				67	#define WUFFS_IMPLEMENTATION
				68
Jimmy Casey	3c50ada	2018-07-26 17:12:19 +0000	[diff] [blame]	69	// If building this program in an environment that doesn't easily accommodate
Nigel Tao	d2075ce	2018-04-25 15:26:29 +1000	[diff] [blame]	70	// relative includes, you can use the script/inline-c-relative-includes.go
				71	// program to generate a stand-alone C file.
Nigel Tao	bc82e51	2018-07-21 09:20:10 +1000	[diff] [blame]	72	#include "../release/c/wuffs-unsupported-snapshot.h"
Nigel Tao	d2075ce	2018-04-25 15:26:29 +1000	[diff] [blame]	73
				74	// The order matters here. Clang also defines "__GNUC__".
				75	#if defined(__clang__)
				76	const char* cc = "clang";
				77	const char* cc_version = __clang_version__;
				78	#elif defined(__GNUC__)
				79	const char* cc = "gcc";
				80	const char* cc_version = __VERSION__;
				81	#elif defined(_MSC_VER)
				82	const char* cc = "cl";
				83	const char* cc_version = "???";
				84	#else
				85	const char* cc = "cc";
				86	const char* cc_version = "???";
				87	#endif
				88
				89	static inline uint32_t load_u32be(uint8_t* p) {
				90	return ((uint32_t)(p[0]) << 24) \| ((uint32_t)(p[1]) << 16) \|
				91	((uint32_t)(p[2]) << 8) \| ((uint32_t)(p[3]) << 0);
				92	}
				93
				94	// Limit the input PNG image (and therefore its IDAT data) to (64 MiB - 1 byte)
				95	// compressed, in up to 1024 IDAT chunks, and 256 MiB and 16384 × 16384 pixels
				96	// uncompressed. This is a limitation of this program (which uses the Wuffs
				97	// standard library), not a limitation of Wuffs per se.
				98	#define DST_BUFFER_SIZE (256 * 1024 * 1024)
				99	#define SRC_BUFFER_SIZE (64 * 1024 * 1024)
				100	#define MAX_DIMENSION (16384)
				101	#define MAX_IDAT_CHUNKS (1024)
				102
				103	uint8_t dst_buffer[DST_BUFFER_SIZE] = {0};
				104	size_t dst_len = 0;
				105	uint8_t src_buffer[SRC_BUFFER_SIZE] = {0};
				106	size_t src_len = 0;
				107	uint8_t idat_buffer[SRC_BUFFER_SIZE] = {0};
				108	// The n'th IDAT chunk data (where n is a zero-based count) is in
				109	// idat_buffer[i:j], where i = idat_splits[n+0] and j = idat_splits[n+1].
				110	size_t idat_splits[MAX_IDAT_CHUNKS + 1] = {0};
				111	uint32_t num_idat_chunks = 0;
				112
				113	uint32_t width = 0;
				114	uint32_t height = 0;
				115	uint64_t bytes_per_pixel = 0;
				116	uint64_t bytes_per_row = 0;
				117	uint64_t bytes_per_frame = 0;
				118
				119	const char* read_stdin() {
				120	while (src_len < SRC_BUFFER_SIZE) {
				121	const int stdin_fd = 0;
				122	ssize_t n = read(stdin_fd, src_buffer + src_len, SRC_BUFFER_SIZE - src_len);
				123	if (n > 0) {
				124	src_len += n;
				125	} else if (n == 0) {
				126	return NULL;
				127	} else if (errno == EINTR) {
				128	// No-op.
				129	} else {
				130	return strerror(errno);
				131	}
				132	}
				133	return "input is too large";
				134	}
				135
				136	const char* process_png_chunks(uint8_t* p, size_t n) {
				137	while (n > 0) {
				138	// Process the 8 byte chunk header.
				139	if (n < 8) {
				140	return "invalid PNG chunk";
				141	}
				142	uint32_t chunk_len = load_u32be(p + 0);
				143	uint32_t chunk_type = load_u32be(p + 4);
				144	p += 8;
				145	n -= 8;
				146
				147	// Process the chunk payload.
				148	if (n < chunk_len) {
				149	return "short PNG chunk data";
				150	}
				151	switch (chunk_type) {
				152	case 0x49484452: // "IHDR"
				153	if (chunk_len != 13) {
				154	return "invalid PNG IDAT chunk";
				155	}
				156	width = load_u32be(p + 0);
				157	height = load_u32be(p + 4);
				158	if ((width == 0) \|\| (height == 0)) {
				159	return "image dimensions are too small";
				160	}
				161	if ((width > MAX_DIMENSION) \|\| (height > MAX_DIMENSION)) {
				162	return "image dimensions are too large";
				163	}
				164	if (p[8] != 8) {
				165	return "unsupported PNG bit depth";
				166	}
				167	if (bytes_per_pixel != 0) {
				168	return "duplicate PNG IHDR chunk";
				169	}
				170	// Process the color type, as per the PNG spec table 11.1.
				171	switch (p[9]) {
				172	case 0:
				173	bytes_per_pixel = 1;
				174	break;
				175	case 2:
				176	bytes_per_pixel = 3;
				177	break;
				178	case 3:
				179	bytes_per_pixel = 1;
				180	break;
				181	case 4:
				182	bytes_per_pixel = 2;
				183	break;
				184	case 6:
				185	bytes_per_pixel = 4;
				186	break;
				187	default:
				188	return "unsupported PNG color type";
				189	}
				190	if (p[12] != 0) {
				191	return "unsupported PNG interlacing";
				192	}
				193	break;
				194
				195	case 0x49444154: // "IDAT"
				196	if (num_idat_chunks == MAX_IDAT_CHUNKS - 1) {
				197	return "too many IDAT chunks";
				198	}
				199	memcpy(idat_buffer + idat_splits[num_idat_chunks], p, chunk_len);
				200	idat_splits[num_idat_chunks + 1] =
				201	idat_splits[num_idat_chunks] + chunk_len;
				202	num_idat_chunks++;
				203	break;
				204	}
				205	p += chunk_len;
				206	n -= chunk_len;
				207
				208	// Process (and ignore) the 4 byte chunk footer (a checksum).
				209	if (n < 4) {
				210	return "invalid PNG chunk";
				211	}
				212	p += 4;
				213	n -= 4;
				214	}
				215	return NULL;
				216	}
				217
				218	const char* decode_once(bool frag_dst, bool frag_idat) {
Nigel Tao	a0bafff	2018-07-14 08:59:36 +1000	[diff] [blame]	219	wuffs_zlib__decoder dec = ((wuffs_zlib__decoder){});
				220	wuffs_zlib__decoder__check_wuffs_version(&dec, sizeof dec, WUFFS_VERSION);
Nigel Tao	d2075ce	2018-04-25 15:26:29 +1000	[diff] [blame]	221
				222	wuffs_base__io_buffer dst = {.ptr = dst_buffer, .len = bytes_per_frame};
				223	wuffs_base__io_buffer idat = {.ptr = idat_buffer,
				224	.len = SRC_BUFFER_SIZE,
				225	.wi = idat_splits[num_idat_chunks],
				226	.closed = true};
Nigel Tao	8827e22	2018-04-27 20:54:44 +1000	[diff] [blame]	227	wuffs_base__io_writer dst_writer = wuffs_base__io_buffer__writer(&dst);
				228	wuffs_base__io_reader idat_reader = wuffs_base__io_buffer__reader(&idat);
Nigel Tao	d2075ce	2018-04-25 15:26:29 +1000	[diff] [blame]	229
				230	uint32_t i = 0; // Number of dst fragments processed, if frag_dst.
				231	if (frag_dst) {
				232	dst.len = bytes_per_row;
				233	}
				234
				235	uint32_t j = 0; // Number of IDAT fragments processed, if frag_idat.
				236	if (frag_idat) {
				237	idat.wi = idat_splits[1];
				238	idat.closed = (num_idat_chunks == 1);
				239	}
				240
				241	while (true) {
Nigel Tao	a0bafff	2018-07-14 08:59:36 +1000	[diff] [blame]	242	wuffs_base__status s =
Nigel Tao	d2075ce	2018-04-25 15:26:29 +1000	[diff] [blame]	243	wuffs_zlib__decoder__decode(&dec, dst_writer, idat_reader);
				244
Nigel Tao	a0bafff	2018-07-14 08:59:36 +1000	[diff] [blame]	245	if (s == WUFFS_BASE__STATUS_OK) {
Nigel Tao	d2075ce	2018-04-25 15:26:29 +1000	[diff] [blame]	246	break;
				247	}
Nigel Tao	a0bafff	2018-07-14 08:59:36 +1000	[diff] [blame]	248	if ((s == WUFFS_BASE__SUSPENSION_SHORT_WRITE) && frag_dst &&
Nigel Tao	d2075ce	2018-04-25 15:26:29 +1000	[diff] [blame]	249	(i < height - 1)) {
				250	i++;
				251	dst.len = bytes_per_row * (i + 1);
				252	continue;
				253	}
Nigel Tao	a0bafff	2018-07-14 08:59:36 +1000	[diff] [blame]	254	if ((s == WUFFS_BASE__SUSPENSION_SHORT_READ) && frag_idat &&
Nigel Tao	d2075ce	2018-04-25 15:26:29 +1000	[diff] [blame]	255	(j < num_idat_chunks - 1)) {
				256	j++;
				257	idat.wi = idat_splits[j + 1];
				258	idat.closed = (num_idat_chunks == j + 1);
				259	continue;
				260	}
				261	return wuffs_zlib__status__string(s);
				262	}
				263
				264	if (dst.wi != bytes_per_frame) {
				265	return "unexpected number of bytes decoded";
				266	}
				267	return NULL;
				268	}
				269
				270	const char* decode(bool frag_dst, bool frag_idat) {
				271	int reps;
				272	if (bytes_per_frame < 100000) {
				273	reps = 1000;
				274	} else if (bytes_per_frame < 1000000) {
				275	reps = 100;
				276	} else if (bytes_per_frame < 10000000) {
				277	reps = 10;
				278	} else {
				279	reps = 1;
				280	}
				281
				282	struct timeval bench_start_tv;
				283	gettimeofday(&bench_start_tv, NULL);
				284
				285	int i;
				286	for (i = 0; i < reps; i++) {
				287	const char* msg = decode_once(frag_dst, frag_idat);
				288	if (msg) {
				289	return msg;
				290	}
				291	}
				292
				293	struct timeval bench_finish_tv;
				294	gettimeofday(&bench_finish_tv, NULL);
				295	int64_t micros =
				296	(int64_t)(bench_finish_tv.tv_sec - bench_start_tv.tv_sec) * 1000000 +
				297	(int64_t)(bench_finish_tv.tv_usec - bench_start_tv.tv_usec);
				298	uint64_t nanos = 1;
				299	if (micros > 0) {
				300	nanos = (uint64_t)(micros)*1000;
				301	}
				302
				303	printf("Benchmark%sDst%sIDAT/%s\t%8d\t%8" PRIu64 " ns/op\n",
				304	frag_dst ? "Frag" : "Full", //
				305	frag_idat ? "Frag" : "Full", //
				306	cc, reps, nanos / reps);
				307
				308	return NULL;
				309	}
				310
				311	int fail(const char* msg) {
				312	const int stderr_fd = 2;
				313	write(stderr_fd, msg, strnlen(msg, 4095));
				314	write(stderr_fd, "\n", 1);
				315	return 1;
				316	}
				317
				318	int main(int argc, char** argv) {
				319	const char* msg = read_stdin();
				320	if (msg) {
				321	return fail(msg);
				322	}
				323	if ((src_len < 8) \|\| strncmp(src_buffer, "\x89PNG\x0D\x0A\x1A\x0A", 8)) {
				324	return fail("invalid PNG");
				325	}
				326	msg = process_png_chunks(src_buffer + 8, src_len - 8);
				327	if (msg) {
				328	return fail(msg);
				329	}
				330	if (bytes_per_pixel == 0) {
				331	return fail("missing PNG IHDR chunk");
				332	}
				333	if (num_idat_chunks == 0) {
				334	return fail("missing PNG IDAT chunk");
				335	}
				336	// The +1 here is for the per-row filter byte.
				337	bytes_per_row = (uint64_t)width * bytes_per_pixel + 1;
				338	bytes_per_frame = (uint64_t)height * bytes_per_row;
				339	if (bytes_per_frame > DST_BUFFER_SIZE) {
				340	return fail("decompressed data is too large");
				341	}
				342
				343	printf("# %s version %s\n#\n", cc, cc_version);
				344	printf(
				345	"# The output format, including the \"Benchmark\" prefixes, is "
				346	"compatible with the\n"
				347	"# https://godoc.org/golang.org/x/perf/cmd/benchstat tool. To install "
				348	"it, first\n"
				349	"# install Go, then run \"go get golang.org/x/perf/cmd/benchstat\".\n");
				350
				351	int i;
				352	for (i = 0; i < 5; i++) {
				353	msg = decode(true, true);
				354	if (msg) {
				355	return fail(msg);
				356	}
				357	msg = decode(true, false);
				358	if (msg) {
				359	return fail(msg);
				360	}
				361	msg = decode(false, true);
				362	if (msg) {
				363	return fail(msg);
				364	}
				365	msg = decode(false, false);
				366	if (msg) {
				367	return fail(msg);
				368	}
				369	}
				370
				371	return 0;
				372	}