Blame - third_party/astc-encoder/Source/astc_mathlib.h - swiftshader.googlesource.com/SwiftShader

blob: 7f504e9d787f4048675d261326fd1f385f812455 [file] [log] [blame]

Alexis Hetu	1b90087	2020-02-24 12:09:16 -0500	[diff] [blame^]	1	// SPDX-License-Identifier: Apache-2.0
				2	// ----------------------------------------------------------------------------
				3	// Copyright 2011-2020 Arm Limited
				4	//
				5	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
				6	// use this file except in compliance with the License. You may obtain a copy
				7	// of the License at:
				8	//
				9	// http://www.apache.org/licenses/LICENSE-2.0
				10	//
				11	// Unless required by applicable law or agreed to in writing, software
				12	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				13	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
				14	// License for the specific language governing permissions and limitations
				15	// under the License.
				16	// ----------------------------------------------------------------------------
				17
				18	/*
				19	* This module implements a variety of mathematical data types and library
				20	* functions used by the codec.
				21	*/
				22
				23	#ifndef ASTC_MATHLIB_H_INCLUDED
				24	#define ASTC_MATHLIB_H_INCLUDED
				25
				26	#include <cmath>
				27	#include <cstdint>
				28
				29	#ifndef M_PI
				30	#define M_PI 3.14159265358979323846
				31	#endif
				32
				33	/* ============================================================================
				34	Fast math library; note that many of the higher-order functions in this set
				35	use approximations which are less accurate, but faster, than <cmath> standard
				36	library equivalents.
				37
				38	Note: Many of these are not necessarily faster than simple C versions when
				39	used on a single scalar value, but are included for testing purposes as most
				40	have an option based on SSE intrinsics and therefore provide an obvious route
				41	to future vectorization.
				42	============================================================================ */
				43
				44	// We support scalar versions of many maths functions which use SSE intrinsics
				45	// as an "optimized" path, using just one lane from the SIMD hardware. In
				46	// reality these are often slower than standard C due to setup and scheduling
				47	// overheads, and the fact that we're not offsetting that cost with any actual
				48	// vectorization.
				49	//
				50	// These variants are only included as a means to test that the accuracy of an
				51	// SSE implementation would be acceptable before refactoring code paths to use
				52	// an actual vectorized implementation which gets some advantage from SSE. It
				53	// is therefore expected that the code will go slower with this macro
				54	// set to 1 ...
				55	#define USE_SCALAR_SSE 0
				56
				57	// These are namespaced to avoid colliding with C standard library functions.
				58	namespace astc
				59	{
				60
				61	/**
				62	* @brief Test if a float value is a nan.
				63	*
				64	* @param val The value test.
				65	*
				66	* @return Zero is not a NaN, non-zero otherwise.
				67	*/
				68	static inline int isnan(float val)
				69	{
				70	return val != val;
				71	}
				72
				73	/**
				74	* @brief Initialize the seed structure for a random number generator.
				75	*
				76	* Important note: For the purposes of ASTC we want sets of random numbers to
				77	* use the codec, but we want the same seed value across instances and threads
				78	* to ensure that image output is stable across compressor runs and across
				79	* platforms. Every PRNG created by this call will therefore return the same
				80	* sequence of values ...
				81	*
				82	* @param state The state structure to initialize.
				83	*/
				84	void rand_init(uint64_t state[2]);
				85
				86	/**
				87	* @brief Return the next random number from the generator.
				88	*
				89	* This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
				90	* public-domain implementation given by David Blackman & Sebastiano Vigna at
				91	* http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
				92	*
				93	* @param state The state structure to use/update.
				94	*/
				95	uint64_t rand(uint64_t state[2]);
				96
				97	}
				98
				99	/* ============================================================================
				100	Utility vector template classes with basic operations
				101	============================================================================ */
				102
				103	template <typename T> class vtype4
				104	{
				105	public:
				106	T x, y, z, w;
				107	vtype4() {}
				108	vtype4(T p, T q, T r, T s) : x(p), y(q), z(r), w(s) {}
				109	vtype4(const vtype4 & p) : x(p.x), y(p.y), z(p.z), w(p.w) {}
				110	vtype4 &operator =(const vtype4 &s) {
				111	this->x = s.x;
				112	this->y = s.y;
				113	this->z = s.z;
				114	this->w = s.w;
				115	return *this;
				116	}
				117	};
				118
				119	typedef vtype4<int> int4;
				120	typedef vtype4<unsigned int> uint4;
				121
				122	static inline int4 operator+(int4 p, int4 q) { return int4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); }
				123	static inline uint4 operator+(uint4 p, uint4 q) { return uint4( p.x + q.x, p.y + q.y, p.z + q.z, p.w + q.w ); }
				124
				125	static inline int4 operator-(int4 p, int4 q) { return int4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); }
				126	static inline uint4 operator-(uint4 p, uint4 q) { return uint4( p.x - q.x, p.y - q.y, p.z - q.z, p.w - q.w ); }
				127
				128	static inline int4 operator(int4 p, int4 q) { return int4( p.x q.x, p.y * q.y, p.z * q.z, p.w * q.w ); }
				129	static inline uint4 operator(uint4 p, uint4 q) { return uint4( p.x q.x, p.y * q.y, p.z * q.z, p.w * q.w ); }
				130
				131	static inline int4 operator(int4 p, int q) { return int4( p.x q, p.y * q, p.z * q, p.w * q ); }
				132	static inline uint4 operator(uint4 p, uint32_t q) { return uint4( p.x q, p.y * q, p.z * q, p.w * q ); }
				133
				134	static inline int4 operator(int p, int4 q) { return q p; }
				135	static inline uint4 operator(uint32_t p, uint4 q) { return q p; }
				136
				137	#ifndef MIN
				138	#define MIN(x,y) ((x)<(y)?(x):(y))
				139	#endif
				140
				141	#ifndef MAX
				142	#define MAX(x,y) ((x)>(y)?(x):(y))
				143	#endif
				144
				145	/* ============================================================================
				146	Softfloat library with fp32 and fp16 conversion functionality.
				147	============================================================================ */
				148	typedef union if32_
				149	{
				150	uint32_t u;
				151	int32_t s;
				152	float f;
				153	} if32;
				154
				155	uint32_t clz32(uint32_t p);
				156
				157	/* sized soft-float types. These are mapped to the sized integer
				158	types of C99, instead of C's floating-point types; this is because
				159	the library needs to maintain exact, bit-level control on all
				160	operations on these data types. */
				161	typedef uint16_t sf16;
				162	typedef uint32_t sf32;
				163
				164	/* widening float->float conversions */
				165	sf32 sf16_to_sf32(sf16);
				166
				167	float sf16_to_float(sf16);
				168
				169	#endif