blob: 09ebe5ab02844ce0227d7004f3640f3622760123 [file] [log] [blame]
Jan Wassenberg94a72d02020-10-29 18:04:03 +01001// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Jan Wassenbergb97d18f2020-11-11 17:53:22 +010015// This include guard is checked by foreach_target, so avoid the usual _H_
16// suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included
17// after/outside this include guard.
18#ifndef HWY_HIGHWAY_INCLUDED
19#define HWY_HIGHWAY_INCLUDED
Jan Wassenberg94a72d02020-10-29 18:04:03 +010020
Jan Wassenbergb97d18f2020-11-11 17:53:22 +010021// Main header required before using vector types.
Jan Wassenberg94a72d02020-10-29 18:04:03 +010022
23#include "hwy/targets.h"
24
25namespace hwy {
26
Jan Wassenberge97d92a2021-01-05 08:59:48 -080027// API version (https://semver.org/)
28#define HWY_MAJOR 0
29#define HWY_MINOR 7
30#define HWY_PATCH 0
31
Jan Wassenberg94a72d02020-10-29 18:04:03 +010032//------------------------------------------------------------------------------
33// Shorthand for descriptors (defined in shared-inl.h) used to select overloads.
34
35// Because Highway functions take descriptor and/or vector arguments, ADL finds
36// these functions without requiring users in project::HWY_NAMESPACE to
37// qualify Highway functions with hwy::HWY_NAMESPACE. However, ADL rules for
38// templates require `using hwy::HWY_NAMESPACE::ShiftLeft;` etc. declarations.
39
40// Full (native-width) vector.
41#define HWY_FULL(T) hwy::HWY_NAMESPACE::Simd<T, HWY_LANES(T)>
42
43// Vector of up to MAX_N lanes.
44#define HWY_CAPPED(T, MAX_N) \
45 hwy::HWY_NAMESPACE::Simd<T, HWY_MIN(MAX_N, HWY_LANES(T))>
46
47//------------------------------------------------------------------------------
48// Export user functions for static/dynamic dispatch
49
50// Evaluates to 0 inside a translation unit if it is generating anything but the
Jan Wassenbergb97d18f2020-11-11 17:53:22 +010051// static target (the last one if multiple targets are enabled). Used to prevent
52// redefinitions of HWY_EXPORT. Unless foreach_target.h is included, we only
53// compile once anyway, so this is 1 unless it is or has been included.
54#ifndef HWY_ONCE
55#define HWY_ONCE 1
56#endif
Jan Wassenberg94a72d02020-10-29 18:04:03 +010057
58// HWY_STATIC_DISPATCH(FUNC_NAME) is the namespace-qualified FUNC_NAME for
59// HWY_STATIC_TARGET (the only defined namespace unless HWY_TARGET_INCLUDE is
60// defined), and can be used to deduce the return type of Choose*.
61#if HWY_STATIC_TARGET == HWY_SCALAR
62#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SCALAR::FUNC_NAME
63#elif HWY_STATIC_TARGET == HWY_WASM
64#define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME
65#elif HWY_STATIC_TARGET == HWY_NEON
66#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
67#elif HWY_STATIC_TARGET == HWY_PPC8
68#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME
69#elif HWY_STATIC_TARGET == HWY_SSE4
70#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE4::FUNC_NAME
71#elif HWY_STATIC_TARGET == HWY_AVX2
72#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX2::FUNC_NAME
73#elif HWY_STATIC_TARGET == HWY_AVX3
74#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3::FUNC_NAME
75#endif
76
77// Dynamic dispatch declarations.
78
79template <typename RetType, typename... Args>
80struct FunctionCache {
81 public:
82 typedef RetType(FunctionType)(Args...);
83
84 // A template function that when instantiated has the same signature as the
85 // function being called. This function initializes the global cache of the
86 // current supported targets mask used for dynamic dispatch and calls the
87 // appropriate function. Since this mask used for dynamic dispatch is a
88 // global cache, all the highway exported functions, even those exposed by
89 // different modules, will be initialized after this function runs for any one
90 // of those exported functions.
91 template <FunctionType* const table[]>
92 static RetType ChooseAndCall(Args... args) {
93 // If we are running here it means we need to update the chosen target.
94 chosen_target.Update();
95 return (table[chosen_target.GetIndex()])(args...);
96 }
97};
98
99// Factory function only used to infer the template parameters RetType and Args
100// from a function passed to the factory.
101template <typename RetType, typename... Args>
102FunctionCache<RetType, Args...> FunctionCacheFactory(RetType (*)(Args...)) {
103 return FunctionCache<RetType, Args...>();
104}
105
106// HWY_CHOOSE_*(FUNC_NAME) expands to the function pointer for that target or
107// nullptr is that target was not compiled.
108#if HWY_TARGETS & HWY_SCALAR
109#define HWY_CHOOSE_SCALAR(FUNC_NAME) &N_SCALAR::FUNC_NAME
110#else
111// When scalar is not present and we try to use scalar because other targets
112// were disabled at runtime we fall back to the baseline with
113// HWY_STATIC_DISPATCH()
114#define HWY_CHOOSE_SCALAR(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
115#endif
116
117#if HWY_TARGETS & HWY_WASM
118#define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME
119#else
120#define HWY_CHOOSE_WASM(FUNC_NAME) nullptr
121#endif
122
Jan Wassenberg0034dac2021-01-07 01:18:02 -0800123#if HWY_TARGETS & HWY_RVV
124#define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME
125#else
126#define HWY_CHOOSE_RVV(FUNC_NAME) nullptr
127#endif
128
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100129#if HWY_TARGETS & HWY_NEON
130#define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME
131#else
132#define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
133#endif
134
135#if HWY_TARGETS & HWY_PPC8
136#define HWY_CHOOSE_PCC8(FUNC_NAME) &N_PPC8::FUNC_NAME
137#else
138#define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr
139#endif
140
141#if HWY_TARGETS & HWY_SSE4
142#define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME
143#else
144#define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr
145#endif
146
147#if HWY_TARGETS & HWY_AVX2
148#define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME
149#else
150#define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr
151#endif
152
153#if HWY_TARGETS & HWY_AVX3
154#define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME
155#else
156#define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr
157#endif
158
159#define HWY_DISPATCH_TABLE(FUNC_NAME) \
160 HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
161
162// HWY_EXPORT(FUNC_NAME); expands to a static array that is used by
163// HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime. This
164// static array must be defined at the same namespace level as the function
165// it is exporting.
166// After being exported, it can be called from other parts of the same source
167// file using HWY_DYNAMIC_DISTPATCH(), in particular from a function wrapper
168// like in the following example:
169//
170// #include "hwy/highway.h"
171// HWY_BEFORE_NAMESPACE();
172// namespace skeleton {
173// namespace HWY_NAMESPACE {
174//
175// void MyFunction(int a, char b, const char* c) { ... }
176//
177// // NOLINTNEXTLINE(google-readability-namespace-comments)
178// } // namespace HWY_NAMESPACE
179// } // namespace skeleton
180// HWY_AFTER_NAMESPACE();
181//
182// namespace skeleton {
183// HWY_EXPORT(MyFunction); // Defines the dispatch table in this scope.
184//
185// void MyFunction(int a, char b, const char* c) {
186// return HWY_DYNAMIC_DISPATCH(MyFunction)(a, b, c);
187// }
188// } // namespace skeleton
189//
190
191#if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
192
193// Simplified version for IDE or the dynamic dispatch case with only one target.
194// This case still uses a table, although of a single element, to provide the
195// same compile error conditions as with the dynamic dispatch case when multiple
196// targets are being compiled.
197#define HWY_EXPORT(FUNC_NAME) \
198 static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
199 FUNC_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
200#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) (*(HWY_DISPATCH_TABLE(FUNC_NAME)[0]))
201
202#else
203
204// Dynamic dispatch case with one entry per dynamic target plus the scalar
205// mode and the initialization wrapper.
206#define HWY_EXPORT(FUNC_NAME) \
207 static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) \
208 const HWY_DISPATCH_TABLE(FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
209 /* The first entry in the table initializes the global cache and \
210 * calls the appropriate function. */ \
211 &decltype(hwy::FunctionCacheFactory(&HWY_STATIC_DISPATCH( \
212 FUNC_NAME)))::ChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>, \
213 HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
214 HWY_CHOOSE_SCALAR(FUNC_NAME), \
215 }
216#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) \
217 (*(HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::chosen_target.GetIndex()]))
218
219#endif // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
220
221} // namespace hwy
222
Jan Wassenbergb97d18f2020-11-11 17:53:22 +0100223#endif // HWY_HIGHWAY_INCLUDED
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100224
225//------------------------------------------------------------------------------
226
227// NOTE: ops/*.h cannot use regular include guards because their definitions
228// depend on HWY_TARGET, e.g. enabling AVX3 instructions on 128-bit vectors, so
229// we want to include them once per target. However, each *-inl.h includes
230// highway.h, so we still need an external per-target include guard.
231#if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
232#ifdef HWY_HIGHWAY_PER_TARGET
233#undef HWY_HIGHWAY_PER_TARGET
234#else
235#define HWY_HIGHWAY_PER_TARGET
236#endif
237
238// These define ops inside namespace hwy::HWY_NAMESPACE.
239#if HWY_TARGET == HWY_SSE4
240#include "hwy/ops/x86_128-inl.h"
241#elif HWY_TARGET == HWY_AVX2
242#include "hwy/ops/x86_256-inl.h"
243#elif HWY_TARGET == HWY_AVX3
244#include "hwy/ops/x86_512-inl.h"
245#elif HWY_TARGET == HWY_PPC8
246#elif HWY_TARGET == HWY_NEON
247#include "hwy/ops/arm_neon-inl.h"
248#elif HWY_TARGET == HWY_WASM
249#include "hwy/ops/wasm_128-inl.h"
Jan Wassenberg0034dac2021-01-07 01:18:02 -0800250#elif HWY_TARGET == HWY_RVV
251// TODO(janwas): header
252#include "hwy/ops/shared-inl.h"
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100253#elif HWY_TARGET == HWY_SCALAR
254#include "hwy/ops/scalar-inl.h"
255#else
256#pragma message("HWY_TARGET does not match any known target")
257#endif // HWY_TARGET
258
Jan Wassenbergb97d18f2020-11-11 17:53:22 +0100259// Commonly used functions/types that must come after ops are defined.
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100260HWY_BEFORE_NAMESPACE();
261namespace hwy {
262namespace HWY_NAMESPACE {
263
Jan Wassenbergbc4a79e2020-12-02 06:46:11 -0800264// The lane type of a vector type, e.g. float for Vec<Simd<float, 4>>.
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100265template <class V>
Jan Wassenbergbc4a79e2020-12-02 06:46:11 -0800266using LaneType = decltype(GetLane(V()));
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100267
Jan Wassenbergd26c37d2021-01-06 02:39:16 -0800268// Vector type, e.g. Vec128<float> for Simd<float, 4>. Useful as the return type
269// of functions that do not take a vector argument, or as an argument type if
270// the function only has a template argument for D, or for explicit type names
271// instead of auto. This may be a built-in type.
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100272template <class D>
273using Vec = decltype(Zero(D()));
274
Jan Wassenbergd26c37d2021-01-06 02:39:16 -0800275// Mask type. Useful as the return type of functions that do not take a mask
276// argument, or as an argument type if the function only has a template argument
277// for D, or for explicit type names instead of auto.
278template <class D>
279using Mask = decltype(MaskFromVec(Zero(D())));
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100280
Jan Wassenbergbc4a79e2020-12-02 06:46:11 -0800281// Returns the closest value to v within [lo, hi].
282template <class V>
283HWY_API V Clamp(const V v, const V lo, const V hi) {
284 return Min(Max(lo, v), hi);
285}
286
Jan Wassenberg32693d92020-12-22 04:33:37 -0800287// CombineShiftRightBytes (and ..Lanes) are not available for the scalar target.
Jan Wassenberg0034dac2021-01-07 01:18:02 -0800288// TODO(janwas): implement for RVV
289#if HWY_TARGET != HWY_SCALAR && HWY_TARGET != HWY_RVV
Jan Wassenberg32693d92020-12-22 04:33:37 -0800290
Jan Wassenberg00f6ea52020-12-21 10:07:29 -0800291template <size_t kLanes, class V>
292HWY_API V CombineShiftRightLanes(const V hi, const V lo) {
293 return CombineShiftRightBytes<kLanes * sizeof(LaneType<V>)>(hi, lo);
294}
295
Jan Wassenberg32693d92020-12-22 04:33:37 -0800296#endif
297
Jan Wassenberg6a880832020-12-03 08:41:16 -0800298// Returns lanes with the most significant bit set and all other bits zero.
299template <class D>
300HWY_API Vec<D> SignBit(D d) {
301 using Unsigned = MakeUnsigned<typename D::T>;
302 const Unsigned bit = Unsigned(1) << (sizeof(Unsigned) * 8 - 1);
303 return BitCast(d, Set(Rebind<Unsigned, D>(), bit));
304}
305
Jan Wassenberg94a72d02020-10-29 18:04:03 +0100306// NOLINTNEXTLINE(google-readability-namespace-comments)
307} // namespace HWY_NAMESPACE
308} // namespace hwy
309HWY_AFTER_NAMESPACE();
310
311#endif // HWY_HIGHWAY_PER_TARGET