Implement ASTC support This cl adds both LDR and HDR support for ASTC compressed textures. Only LDR formats are exposed in the PhysicalDevice's properties, but HDR support can be added trivially later by adding the HDR formats and exposing "VK_EXT_texture_compression_astc_hdr". Pulled from: https://github.com/ARM-software/astc-encoder Git hash: 81a5e50741b4c8302cf7d78f314a53e44ee68e1f The astc-encode git repo was added to third-party, with a few minor modifications: 1) All encoding related code has been ripped out, only decoding related code remains 2) Replaced ASTC_CODEC_INTERNAL_ERROR() with UNREACHABLE() in a switch statement in astc_color_unquantize.cpp 3) Some functions were using a lot of stack memory, so I added a unique_ptr to allocate the same objects on the heap, to avoid potential issues. LDR ASTC is decoded to 8bit unsigned RGBA. HDR ASTC is decoded to 32b floating point. Tests: dEQP-VK.*astc* Bug: b/150130101 Change-Id: I6b03fed6e1f326a95c7aefe9f9a9d0a89cf24428 Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/41568 Reviewed-by: Nicolas Capens <nicolascapens@google.com> Tested-by: Nicolas Capens <nicolascapens@google.com> Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>

commit: 1b90087896c5174b6bdeb5c5d34d7005718f87e8 [log] [tgz]
author: Alexis Hetu <sugoi@google.com> Mon Feb 24 12:09:16 2020 -0500
committer: Alexis Hétu <sugoi@google.com> Fri Feb 28 14:27:58 2020 +0000
tree: 7c60958b7d171e9371c2532ca6bf1aed4b01d389
parent: 3e5fe285a7ef1f5227871a1a574078860e00eb33 [diff] [blame]
diff --git a/third_party/astc-encoder/Source/astc_mathlib_softfloat.cpp b/third_party/astc-encoder/Source/astc_mathlib_softfloat.cpp
new file mode 100644
index 0000000..975db2c
--- /dev/null
+++ b/third_party/astc-encoder/Source/astc_mathlib_softfloat.cpp

@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2011-2020 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief Soft-float library for IEEE-754.
+ */
+
+#include "astc_mathlib.h"
+
+/******************************************
+  helper functions and their lookup tables
+ ******************************************/
+/* count leading zeros functions. Only used when the input is nonzero. */
+
+#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
+#elif defined(__arm__) && defined(__ARMCC_VERSION)
+#elif defined(__arm__) && defined(__GNUC__)
+#else
+	/* table used for the slow default versions. */
+	static const uint8_t clz_table[256] =
+	{
+		8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+	};
+#endif
+
+/*
+   32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
+uint32_t clz32(uint32_t inp)
+{
+	#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
+		uint32_t bsr;
+	__asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
+		return 31 - bsr;
+	#else
+		#if defined(__arm__) && defined(__ARMCC_VERSION)
+			return __clz(inp);			/* armcc builtin */
+		#else
+			#if defined(__arm__) && defined(__GNUC__)
+				uint32_t lz;
+			__asm__("clz %0, %1": "=r"(lz):"r"(inp));
+				return lz;
+			#else
+				/* slow default version */
+				uint32_t summa = 24;
+				if (inp >= UINT32_C(0x10000))
+				{
+					inp >>= 16;
+					summa -= 16;
+				}
+				if (inp >= UINT32_C(0x100))
+				{
+					inp >>= 8;
+					summa -= 8;
+				}
+				return summa + clz_table[inp];
+			#endif
+		#endif
+	#endif
+}
+
+/* convert from FP16 to FP32. */
+sf32 sf16_to_sf32(sf16 inp)
+{
+	uint32_t inpx = inp;
+
+	/*
+		This table contains, for every FP16 sign/exponent value combination,
+		the difference between the input FP16 value and the value obtained
+		by shifting the correct FP32 result right by 13 bits.
+		This table allows us to handle every case except denormals and NaN
+		with just 1 table lookup, 2 shifts and 1 add.
+	*/
+
+	#define WITH_MB(a) INT32_C((a) | (1 << 31))
+	static const int32_t tbl[64] =
+	{
+		WITH_MB(0x00000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
+		INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
+		INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
+		INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), WITH_MB(0x38000),
+		WITH_MB(0x38000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
+		INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
+		INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
+		INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), WITH_MB(0x70000)
+	};
+
+	int32_t res = tbl[inpx >> 10];
+	res += inpx;
+
+	/* the normal cases: the MSB of 'res' is not set. */
+	if (res >= 0)				/* signed compare */
+		return res << 13;
+
+	/* Infinity and Zero: the bottom 10 bits of 'res' are clear. */
+	if ((res & UINT32_C(0x3FF)) == 0)
+		return res << 13;
+
+	/* NaN: the exponent field of 'inp' is not zero; NaNs must be quietened. */
+	if ((inpx & 0x7C00) != 0)
+		return (res << 13) | UINT32_C(0x400000);
+
+	/* the remaining cases are Denormals. */
+	{
+		uint32_t sign = (inpx & UINT32_C(0x8000)) << 16;
+		uint32_t mskval = inpx & UINT32_C(0x7FFF);
+		uint32_t leadingzeroes = clz32(mskval);
+		mskval <<= leadingzeroes;
+		return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
+	}
+}
+
+/* convert from soft-float to native-float */
+float sf16_to_float(sf16 p)
+{
+	if32 i;
+	i.u = sf16_to_sf32(p);
+	return i.f;
+}
+
commit	1b90087896c5174b6bdeb5c5d34d7005718f87e8	[log] [tgz]
author	Alexis Hetu <sugoi@google.com>	Mon Feb 24 12:09:16 2020 -0500
committer	Alexis Hétu <sugoi@google.com>	Fri Feb 28 14:27:58 2020 +0000
tree	7c60958b7d171e9371c2532ca6bf1aed4b01d389
parent	3e5fe285a7ef1f5227871a1a574078860e00eb33 [diff] [blame]