blob: d6b4e38890bead69dae64c57065e0feaa04bf424 [file] [log] [blame]
Jan Wassenberg94a72d02020-10-29 18:04:03 +01001// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
JPEG XL226f84e2020-11-11 20:22:00 +010015#ifndef HIGHWAY_HWY_BASE_H_
16#define HIGHWAY_HWY_BASE_H_
Jan Wassenberg94a72d02020-10-29 18:04:03 +010017
18// For SIMD module implementations and their callers, target-independent.
19
20#include <stddef.h>
21#include <stdint.h>
22
23#include <atomic>
24
25// Add to #if conditions to prevent IDE from graying out code.
26#if (defined __CDT_PARSER__) || (defined __INTELLISENSE__) || \
Jan Wassenbergb97d18f2020-11-11 17:53:22 +010027 (defined Q_CREATOR_RUN) || (defined(__CLANGD__))
Jan Wassenberg94a72d02020-10-29 18:04:03 +010028#define HWY_IDE 1
29#else
30#define HWY_IDE 0
31#endif
32
33//------------------------------------------------------------------------------
34// Detect compiler using predefined macros
35
36#ifdef _MSC_VER
37#define HWY_COMPILER_MSVC _MSC_VER
38#else
39#define HWY_COMPILER_MSVC 0
40#endif
41
42#ifdef __INTEL_COMPILER
43#define HWY_COMPILER_ICC __INTEL_COMPILER
44#else
45#define HWY_COMPILER_ICC 0
46#endif
47
48#ifdef __GNUC__
49#define HWY_COMPILER_GCC (__GNUC__ * 100 + __GNUC_MINOR__)
50#else
51#define HWY_COMPILER_GCC 0
52#endif
53
54// Clang can masquerade as MSVC/GCC, in which case both are set.
55#ifdef __clang__
56#define HWY_COMPILER_CLANG (__clang_major__ * 100 + __clang_minor__)
57#else
58#define HWY_COMPILER_CLANG 0
59#endif
60
61// More than one may be nonzero, but we want at least one.
62#if !HWY_COMPILER_MSVC && !HWY_COMPILER_ICC && !HWY_COMPILER_GCC && \
63 !HWY_COMPILER_CLANG
64#error "Unsupported compiler"
65#endif
66
67//------------------------------------------------------------------------------
68// Compiler-specific definitions
69
70#define HWY_STR_IMPL(macro) #macro
71#define HWY_STR(macro) HWY_STR_IMPL(macro)
72
73#if HWY_COMPILER_MSVC
74
75#include <intrin.h>
76
77#define HWY_RESTRICT __restrict
78#define HWY_INLINE __forceinline
79#define HWY_NOINLINE __declspec(noinline)
80#define HWY_FLATTEN
81#define HWY_NORETURN __declspec(noreturn)
82#define HWY_LIKELY(expr) (expr)
83#define HWY_UNLIKELY(expr) (expr)
Jan Wassenbergacfaee62020-12-31 08:48:22 -080084#define HWY_PRAGMA(tokens) __pragma(tokens)
Jan Wassenberg994eb842020-12-31 06:12:45 -080085#define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(warning(tokens))
Jan Wassenberg94a72d02020-10-29 18:04:03 +010086#define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(msc)
87#define HWY_MAYBE_UNUSED
88#define HWY_HAS_ASSUME_ALIGNED 0
89#if (_MSC_VER >= 1700)
90#define HWY_MUST_USE_RESULT _Check_return_
91#else
92#define HWY_MUST_USE_RESULT
93#endif
94
95#else
96
97#define HWY_RESTRICT __restrict__
98#define HWY_INLINE inline __attribute__((always_inline))
99#define HWY_NOINLINE __attribute__((noinline))
100#define HWY_FLATTEN __attribute__((flatten))
101#define HWY_NORETURN __attribute__((noreturn))
102#define HWY_LIKELY(expr) __builtin_expect(!!(expr), 1)
103#define HWY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
104#define HWY_PRAGMA(tokens) _Pragma(#tokens)
105#define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(GCC diagnostic tokens)
106#define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(gcc)
107// Encountered "attribute list cannot appear here" when using the C++17
108// [[maybe_unused]], so only use the old style attribute for now.
109#define HWY_MAYBE_UNUSED __attribute__((unused))
110#define HWY_MUST_USE_RESULT __attribute__((warn_unused_result))
111
112#endif // !HWY_COMPILER_MSVC
113
114//------------------------------------------------------------------------------
115// Builtin/attributes
116
117#ifdef __has_builtin
118#define HWY_HAS_BUILTIN(name) __has_builtin(name)
119#else
120#define HWY_HAS_BUILTIN(name) 0
121#endif
122
123#ifdef __has_attribute
124#define HWY_HAS_ATTRIBUTE(name) __has_attribute(name)
125#else
126#define HWY_HAS_ATTRIBUTE(name) 0
127#endif
128
129// Enables error-checking of format strings.
130#if HWY_HAS_ATTRIBUTE(__format__)
131#define HWY_FORMAT(idx_fmt, idx_arg) \
132 __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
133#else
134#define HWY_FORMAT(idx_fmt, idx_arg)
135#endif
136
137// Returns a void* pointer which the compiler then assumes is N-byte aligned.
138// Example: float* HWY_RESTRICT aligned = (float*)HWY_ASSUME_ALIGNED(in, 32);
139//
140// The assignment semantics are required by GCC/Clang. ICC provides an in-place
141// __assume_aligned, whereas MSVC's __assume appears unsuitable.
142#if HWY_HAS_BUILTIN(__builtin_assume_aligned)
143#define HWY_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
144#else
145#define HWY_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */
146#endif
147
148// Clang and GCC require attributes on each function into which SIMD intrinsics
149// are inlined. Support both per-function annotation (HWY_ATTR) for lambdas and
150// automatic annotation via pragmas.
151#if HWY_COMPILER_CLANG
152#define HWY_PUSH_ATTRIBUTES(targets_str) \
Jan Wassenberg2dc9ece2020-12-21 00:44:42 -0800153 HWY_PRAGMA(clang attribute push(__attribute__((target(targets_str))), \
154 apply_to = function))
155#define HWY_POP_ATTRIBUTES HWY_PRAGMA(clang attribute pop)
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100156#elif HWY_COMPILER_GCC
157#define HWY_PUSH_ATTRIBUTES(targets_str) \
Jan Wassenberg2dc9ece2020-12-21 00:44:42 -0800158 HWY_PRAGMA(GCC push_options) HWY_PRAGMA(GCC target targets_str)
159#define HWY_POP_ATTRIBUTES HWY_PRAGMA(GCC pop_options)
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100160#else
Highwayf999d0e2020-11-13 13:17:34 +0100161#define HWY_PUSH_ATTRIBUTES(targets_str)
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100162#define HWY_POP_ATTRIBUTES
163#endif
164
165//------------------------------------------------------------------------------
166// Detect architecture using predefined macros
167
168#if defined(__i386__) || defined(_M_IX86)
169#define HWY_ARCH_X86_32 1
170#else
171#define HWY_ARCH_X86_32 0
172#endif
173
174#if defined(__x86_64__) || defined(_M_X64)
175#define HWY_ARCH_X86_64 1
176#else
177#define HWY_ARCH_X86_64 0
178#endif
179
180#if HWY_ARCH_X86_32 || HWY_ARCH_X86_64
181#define HWY_ARCH_X86 1
182#else
183#define HWY_ARCH_X86 0
184#endif
185
186#if defined(__powerpc64__) || defined(_M_PPC)
187#define HWY_ARCH_PPC 1
188#else
189#define HWY_ARCH_PPC 0
190#endif
191
192#if defined(__arm__) || defined(_M_ARM) || defined(__aarch64__)
193#define HWY_ARCH_ARM 1
194#else
195#define HWY_ARCH_ARM 0
196#endif
197
198// There isn't yet a standard __wasm or __wasm__.
199#ifdef __EMSCRIPTEN__
200#define HWY_ARCH_WASM 1
201#else
202#define HWY_ARCH_WASM 0
203#endif
204
Jan Wassenberg0034dac2021-01-07 01:18:02 -0800205#ifdef __riscv
206#define HWY_ARCH_RVV 1
207#else
208#define HWY_ARCH_RVV 0
209#endif
210
211#if (HWY_ARCH_X86 + HWY_ARCH_PPC + HWY_ARCH_ARM + HWY_ARCH_WASM + \
212 HWY_ARCH_RVV) != 1
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100213#error "Must detect exactly one platform"
214#endif
215
216//------------------------------------------------------------------------------
217// Macros
218
Jan Wassenberg586d0c22021-01-06 09:09:54 -0800219#define HWY_API static HWY_INLINE HWY_FLATTEN HWY_MAYBE_UNUSED
220
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100221#define HWY_CONCAT_IMPL(a, b) a##b
222#define HWY_CONCAT(a, b) HWY_CONCAT_IMPL(a, b)
223
224#define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
225#define HWY_MAX(a, b) ((a) < (b) ? (b) : (a))
226
227// Alternative for asm volatile("" : : : "memory"), which has no effect.
228#define HWY_FENCE std::atomic_thread_fence(std::memory_order_acq_rel)
229
230// 4 instances of a given literal value, useful as input to LoadDup128.
231#define HWY_REP4(literal) literal, literal, literal, literal
232
Jan Wassenbergb97d18f2020-11-11 17:53:22 +0100233#define HWY_ABORT(format, ...) \
234 ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__)
235
236// Always enabled.
237#define HWY_ASSERT(condition) \
238 do { \
239 if (!(condition)) { \
240 HWY_ABORT("Assert %s", #condition); \
241 } \
242 } while (0)
243
244// Only for "debug" builds
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100245#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || \
246 defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER)
Jan Wassenbergb97d18f2020-11-11 17:53:22 +0100247#define HWY_DASSERT(condition) HWY_ASSERT(condition)
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100248#else
249#define HWY_DASSERT(condition) \
250 do { \
251 } while (0)
252#endif
253
254//------------------------------------------------------------------------------
255
256namespace hwy {
257
Jan Wassenbergb97d18f2020-11-11 17:53:22 +0100258// See also HWY_ALIGNMENT - aligned_allocator aligns to the larger of that and
259// the vector size, whose upper bound is specified here.
260
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100261#if HWY_ARCH_X86
262static constexpr size_t kMaxVectorSize = 64; // AVX-512
263#define HWY_ALIGN_MAX alignas(64)
264#else
265static constexpr size_t kMaxVectorSize = 16;
266#define HWY_ALIGN_MAX alignas(16)
267#endif
268
269HWY_NORETURN void HWY_FORMAT(3, 4)
270 Abort(const char* file, int line, const char* format, ...);
271
272template <typename T>
273constexpr bool IsFloat() {
274 return T(1.25) != T(1);
275}
276
277template <typename T>
278constexpr bool IsSigned() {
279 return T(0) > T(-1);
280}
281
282// Largest/smallest representable integer values.
283template <typename T>
284constexpr T LimitsMax() {
285 return IsSigned<T>() ? T((1ULL << (sizeof(T) * 8 - 1)) - 1)
286 : static_cast<T>(~0ull);
287}
288template <typename T>
289constexpr T LimitsMin() {
290 return IsSigned<T>() ? T(-1) - LimitsMax<T>() : T(0);
291}
292
293// Manual control of overload resolution (SFINAE).
294template <bool Condition, class T>
295struct EnableIfT {};
296template <class T>
297struct EnableIfT<true, T> {
298 using type = T;
299};
300
301template <bool Condition, class T = void>
302using EnableIf = typename EnableIfT<Condition, T>::type;
303
Jan Wassenbergb97d18f2020-11-11 17:53:22 +0100304template <typename T1, typename T2>
305constexpr inline T1 DivCeil(T1 a, T2 b) {
306 return (a + b - 1) / b;
307}
308
309// Works for any `align`; if a power of two, compiler emits ADD+AND.
310constexpr inline size_t RoundUpTo(size_t what, size_t align) {
311 return DivCeil(what, align) * align;
312}
313
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100314// Undefined results for x == 0.
Jan Wassenberg586d0c22021-01-06 09:09:54 -0800315HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x) {
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100316#ifdef _MSC_VER
Jan Wassenberg0034dac2021-01-07 01:18:02 -0800317 unsigned long index; // NOLINT
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100318 _BitScanForward(&index, x);
319 return index;
320#else
321 return static_cast<size_t>(__builtin_ctz(x));
322#endif
323}
324
Jan Wassenberg586d0c22021-01-06 09:09:54 -0800325HWY_API size_t PopCount(uint64_t x) {
326#if HWY_COMPILER_CLANG || HWY_COMPILER_GCC
327 return static_cast<size_t>(__builtin_popcountll(x));
328#elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
329 return _mm_popcnt_u64(x);
330#elif HWY_COMPILER_MSVC
331 return _mm_popcnt_u32(uint32_t(x)) + _mm_popcnt_u32(uint32_t(x >> 32));
332#else
333 x -= ((x >> 1) & 0x55555555U);
334 x = (((x >> 2) & 0x33333333U) + (x & 0x33333333U));
335 x = (((x >> 4) + x) & 0x0F0F0F0FU);
336 x += (x >> 8);
337 x += (x >> 16);
338 x += (x >> 32);
339 x = x & 0x0000007FU;
340 return (unsigned int)x;
341#endif
342}
343
344// The source/destination must not overlap/alias.
345template <size_t kBytes, typename From, typename To>
346HWY_API void CopyBytes(const From* from, To* to) {
347#if HWY_COMPILER_MSVC
348 const uint8_t* HWY_RESTRICT from_bytes =
349 reinterpret_cast<const uint8_t*>(from);
350 uint8_t* HWY_RESTRICT to_bytes = reinterpret_cast<uint8_t*>(to);
351 for (size_t i = 0; i < kBytes; ++i) {
352 to_bytes[i] = from_bytes[i];
353 }
354#else
355 // Avoids horrible codegen on Clang (series of PINSRB)
356 __builtin_memcpy(to, from, kBytes);
357#endif
358}
359
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100360} // namespace hwy
361
JPEG XL226f84e2020-11-11 20:22:00 +0100362#endif // HIGHWAY_HWY_BASE_H_