Implement ASTC support
This cl adds both LDR and HDR support for ASTC compressed
textures. Only LDR formats are exposed in the
PhysicalDevice's properties, but HDR support can be added
trivially later by adding the HDR formats and exposing
"VK_EXT_texture_compression_astc_hdr".
Pulled from: https://github.com/ARM-software/astc-encoder
Git hash: 81a5e50741b4c8302cf7d78f314a53e44ee68e1f
The astc-encode git repo was added to third-party, with a
few minor modifications:
1) All encoding related code has been ripped out, only
decoding related code remains
2) Replaced ASTC_CODEC_INTERNAL_ERROR() with UNREACHABLE()
in a switch statement in astc_color_unquantize.cpp
3) Some functions were using a lot of stack memory, so I
added a unique_ptr to allocate the same objects on the
heap, to avoid potential issues.
LDR ASTC is decoded to 8bit unsigned RGBA.
HDR ASTC is decoded to 32b floating point.
Tests: dEQP-VK.*astc*
Bug: b/150130101
Change-Id: I6b03fed6e1f326a95c7aefe9f9a9d0a89cf24428
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/41568
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
Tested-by: Nicolas Capens <nicolascapens@google.com>
Kokoro-Presubmit: kokoro <noreply+kokoro@google.com>
diff --git a/third_party/astc-encoder/Source/astc_mathlib_softfloat.cpp b/third_party/astc-encoder/Source/astc_mathlib_softfloat.cpp
new file mode 100644
index 0000000..975db2c
--- /dev/null
+++ b/third_party/astc-encoder/Source/astc_mathlib_softfloat.cpp
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2011-2020 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief Soft-float library for IEEE-754.
+ */
+
+#include "astc_mathlib.h"
+
+/******************************************
+ helper functions and their lookup tables
+ ******************************************/
+/* count leading zeros functions. Only used when the input is nonzero. */
+
+#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
+#elif defined(__arm__) && defined(__ARMCC_VERSION)
+#elif defined(__arm__) && defined(__GNUC__)
+#else
+ /* table used for the slow default versions. */
+ static const uint8_t clz_table[256] =
+ {
+ 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+#endif
+
+/*
+ 32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */
+uint32_t clz32(uint32_t inp)
+{
+ #if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
+ uint32_t bsr;
+ __asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1));
+ return 31 - bsr;
+ #else
+ #if defined(__arm__) && defined(__ARMCC_VERSION)
+ return __clz(inp); /* armcc builtin */
+ #else
+ #if defined(__arm__) && defined(__GNUC__)
+ uint32_t lz;
+ __asm__("clz %0, %1": "=r"(lz):"r"(inp));
+ return lz;
+ #else
+ /* slow default version */
+ uint32_t summa = 24;
+ if (inp >= UINT32_C(0x10000))
+ {
+ inp >>= 16;
+ summa -= 16;
+ }
+ if (inp >= UINT32_C(0x100))
+ {
+ inp >>= 8;
+ summa -= 8;
+ }
+ return summa + clz_table[inp];
+ #endif
+ #endif
+ #endif
+}
+
+/* convert from FP16 to FP32. */
+sf32 sf16_to_sf32(sf16 inp)
+{
+ uint32_t inpx = inp;
+
+ /*
+ This table contains, for every FP16 sign/exponent value combination,
+ the difference between the input FP16 value and the value obtained
+ by shifting the correct FP32 result right by 13 bits.
+ This table allows us to handle every case except denormals and NaN
+ with just 1 table lookup, 2 shifts and 1 add.
+ */
+
+ #define WITH_MB(a) INT32_C((a) | (1 << 31))
+ static const int32_t tbl[64] =
+ {
+ WITH_MB(0x00000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
+ INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
+ INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000),
+ INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), INT32_C(0x1C000), WITH_MB(0x38000),
+ WITH_MB(0x38000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
+ INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
+ INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000),
+ INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), INT32_C(0x54000), WITH_MB(0x70000)
+ };
+
+ int32_t res = tbl[inpx >> 10];
+ res += inpx;
+
+ /* the normal cases: the MSB of 'res' is not set. */
+ if (res >= 0) /* signed compare */
+ return res << 13;
+
+ /* Infinity and Zero: the bottom 10 bits of 'res' are clear. */
+ if ((res & UINT32_C(0x3FF)) == 0)
+ return res << 13;
+
+ /* NaN: the exponent field of 'inp' is not zero; NaNs must be quietened. */
+ if ((inpx & 0x7C00) != 0)
+ return (res << 13) | UINT32_C(0x400000);
+
+ /* the remaining cases are Denormals. */
+ {
+ uint32_t sign = (inpx & UINT32_C(0x8000)) << 16;
+ uint32_t mskval = inpx & UINT32_C(0x7FFF);
+ uint32_t leadingzeroes = clz32(mskval);
+ mskval <<= leadingzeroes;
+ return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign;
+ }
+}
+
+/* convert from soft-float to native-float */
+float sf16_to_float(sf16 p)
+{
+ if32 i;
+ i.u = sf16_to_sf32(p);
+ return i.f;
+}
+