Alexis Hetu | 1b90087 | 2020-02-24 12:09:16 -0500 | [diff] [blame^] | 1 | // SPDX-License-Identifier: Apache-2.0 |
| 2 | // ---------------------------------------------------------------------------- |
| 3 | // Copyright 2011-2020 Arm Limited |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| 6 | // use this file except in compliance with the License. You may obtain a copy |
| 7 | // of the License at: |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 13 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 14 | // License for the specific language governing permissions and limitations |
| 15 | // under the License. |
| 16 | // ---------------------------------------------------------------------------- |
| 17 | |
| 18 | /* |
| 19 | * This module implements a variety of mathematical data types and library |
| 20 | * functions used by the codec. |
| 21 | */ |
| 22 | |
| 23 | #ifndef ASTC_MATHLIB_H_INCLUDED |
| 24 | #define ASTC_MATHLIB_H_INCLUDED |
| 25 | |
| 26 | #include <cmath> |
| 27 | #include <cstdint> |
| 28 | |
| 29 | #ifndef M_PI |
| 30 | #define M_PI 3.14159265358979323846 |
| 31 | #endif |
| 32 | |
| 33 | /* ============================================================================ |
| 34 | Fast math library; note that many of the higher-order functions in this set |
| 35 | use approximations which are less accurate, but faster, than <cmath> standard |
| 36 | library equivalents. |
| 37 | |
| 38 | Note: Many of these are not necessarily faster than simple C versions when |
| 39 | used on a single scalar value, but are included for testing purposes as most |
| 40 | have an option based on SSE intrinsics and therefore provide an obvious route |
| 41 | to future vectorization. |
| 42 | ============================================================================ */ |
| 43 | |
| 44 | // We support scalar versions of many maths functions which use SSE intrinsics |
| 45 | // as an "optimized" path, using just one lane from the SIMD hardware. In |
| 46 | // reality these are often slower than standard C due to setup and scheduling |
| 47 | // overheads, and the fact that we're not offsetting that cost with any actual |
| 48 | // vectorization. |
| 49 | // |
| 50 | // These variants are only included as a means to test that the accuracy of an |
| 51 | // SSE implementation would be acceptable before refactoring code paths to use |
| 52 | // an actual vectorized implementation which gets some advantage from SSE. It |
| 53 | // is therefore expected that the code will go *slower* with this macro |
| 54 | // set to 1 ... |
| 55 | #define USE_SCALAR_SSE 0 |
| 56 | |
| 57 | // These are namespaced to avoid colliding with C standard library functions. |
| 58 | namespace astc |
| 59 | { |
| 60 | |
| 61 | /** |
| 62 | * @brief Test if a float value is a nan. |
| 63 | * |
| 64 | * @param val The value test. |
| 65 | * |
| 66 | * @return Zero is not a NaN, non-zero otherwise. |
| 67 | */ |
| 68 | static inline int isnan(float val) |
| 69 | { |
| 70 | return val != val; |
| 71 | } |
| 72 | |
| 73 | /** |
| 74 | * @brief Initialize the seed structure for a random number generator. |
| 75 | * |
| 76 | * Important note: For the purposes of ASTC we want sets of random numbers to |
| 77 | * use the codec, but we want the same seed value across instances and threads |
| 78 | * to ensure that image output is stable across compressor runs and across |
| 79 | * platforms. Every PRNG created by this call will therefore return the same |
| 80 | * sequence of values ... |
| 81 | * |
| 82 | * @param state The state structure to initialize. |
| 83 | */ |
| 84 | void rand_init(uint64_t state[2]); |
| 85 | |
| 86 | /** |
| 87 | * @brief Return the next random number from the generator. |
| 88 | * |
| 89 | * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the |
| 90 | * public-domain implementation given by David Blackman & Sebastiano Vigna at |
| 91 | * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c |
| 92 | * |
| 93 | * @param state The state structure to use/update. |
| 94 | */ |
| 95 | uint64_t rand(uint64_t state[2]); |
| 96 | |
| 97 | } |
| 98 | |
| 99 | /* ============================================================================ |
| 100 | Utility vector template classes with basic operations |
| 101 | ============================================================================ */ |
| 102 | |
| 103 | template <typename T> class vtype4 |
| 104 | { |
| 105 | public: |
| 106 | T x, y, z, w; |
| 107 | vtype4() {} |
| 108 | vtype4(T p, T q, T r, T s) : x(p), y(q), z(r), w(s) {} |
| 109 | vtype4(const vtype4 & p) : x(p.x), y(p.y), z(p.z), w(p.w) {} |
| 110 | vtype4 &operator =(const vtype4 &s) { |
| 111 | this->x = s.x; |
| 112 | this->y = s.y; |
| 113 | this->z = s.z; |
| 114 | this->w = s.w; |
| 115 | return *this; |
| 116 | } |
| 117 | }; |
| 118 | |
| 119 | typedef vtype4<int> int4; |
| 120 | typedef vtype4<unsigned int> uint4; |
| 121 | |
| 122 | static inline int4 operator+(int4 p, int4 q) { return int4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); } |
| 123 | static inline uint4 operator+(uint4 p, uint4 q) { return uint4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); } |
| 124 | |
| 125 | static inline int4 operator-(int4 p, int4 q) { return int4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); } |
| 126 | static inline uint4 operator-(uint4 p, uint4 q) { return uint4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); } |
| 127 | |
| 128 | static inline int4 operator*(int4 p, int4 q) { return int4( p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); } |
| 129 | static inline uint4 operator*(uint4 p, uint4 q) { return uint4( p.x * q.x, p.y * q.y, p.z * q.z, p.w * q.w ); } |
| 130 | |
| 131 | static inline int4 operator*(int4 p, int q) { return int4( p.x * q, p.y * q, p.z * q, p.w * q ); } |
| 132 | static inline uint4 operator*(uint4 p, uint32_t q) { return uint4( p.x * q, p.y * q, p.z * q, p.w * q ); } |
| 133 | |
| 134 | static inline int4 operator*(int p, int4 q) { return q * p; } |
| 135 | static inline uint4 operator*(uint32_t p, uint4 q) { return q * p; } |
| 136 | |
| 137 | #ifndef MIN |
| 138 | #define MIN(x,y) ((x)<(y)?(x):(y)) |
| 139 | #endif |
| 140 | |
| 141 | #ifndef MAX |
| 142 | #define MAX(x,y) ((x)>(y)?(x):(y)) |
| 143 | #endif |
| 144 | |
| 145 | /* ============================================================================ |
| 146 | Softfloat library with fp32 and fp16 conversion functionality. |
| 147 | ============================================================================ */ |
| 148 | typedef union if32_ |
| 149 | { |
| 150 | uint32_t u; |
| 151 | int32_t s; |
| 152 | float f; |
| 153 | } if32; |
| 154 | |
| 155 | uint32_t clz32(uint32_t p); |
| 156 | |
| 157 | /* sized soft-float types. These are mapped to the sized integer |
| 158 | types of C99, instead of C's floating-point types; this is because |
| 159 | the library needs to maintain exact, bit-level control on all |
| 160 | operations on these data types. */ |
| 161 | typedef uint16_t sf16; |
| 162 | typedef uint32_t sf32; |
| 163 | |
| 164 | /* widening float->float conversions */ |
| 165 | sf32 sf16_to_sf32(sf16); |
| 166 | |
| 167 | float sf16_to_float(sf16); |
| 168 | |
| 169 | #endif |