Fix MSVC arm build.
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index 3b1c6e7..e436360 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -132,12 +132,12 @@
template<> EIGEN_STRONG_INLINE Packet1cf pconj(const Packet1cf& a)
{
- const Packet2ui b = vreinterpret_u32_f32(a.v);
+ const Packet2ui b = Packet2ui(vreinterpret_u32_f32(a.v));
return Packet1cf(vreinterpret_f32_u32(veor_u32(b, p2ui_CONJ_XOR())));
}
template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
{
- const Packet4ui b = vreinterpretq_u32_f32(a.v);
+ const Packet4ui b = Packet4ui(vreinterpretq_u32_f32(a.v));
return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
}
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index 6d4d1c3..e2bc342 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -59,6 +59,16 @@
typedef eigen_packet_wrapper<int64x2_t ,16> Packet2l;
typedef eigen_packet_wrapper<uint64x2_t ,17> Packet2ul;
+EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) {
+ float from[4] = {a, b, c, d};
+ return vld1q_f32(from);
+}
+
+EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) {
+ float from[2] = {a, b};
+ return vld1_f32(from);
+}
+
#else
typedef float32x2_t Packet2f;
@@ -80,11 +90,14 @@
typedef int64x2_t Packet2l;
typedef uint64x2_t Packet2ul;
+EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) { return {a, b, c, d}; }
+EIGEN_ALWAYS_INLINE Packet4f make_packet2f(float a, float b) { return {a, b}; }
+
#endif // EIGEN_COMP_MSVC_STRICT
EIGEN_STRONG_INLINE Packet4f shuffle1(const Packet4f& m, int mask){
const float* a = reinterpret_cast<const float*>(&m);
- Packet4f res = {*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3 )), *(a + ((mask >> 6) & 3))};
+ Packet4f res = make_packet4f(*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3 )), *(a + ((mask >> 6) & 3)));
return res;
}
@@ -97,7 +110,7 @@
{
const float* a = reinterpret_cast<const float*>(&m);
const float* b = reinterpret_cast<const float*>(&n);
- Packet4f res = {*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(b + ((mask >> 4) & 3)), *(b + ((mask >> 6) & 3))};
+ Packet4f res = make_packet4f(*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(b + ((mask >> 4) & 3)), *(b + ((mask >> 6) & 3)));
return res;
}
@@ -106,7 +119,7 @@
{
const float* a = reinterpret_cast<const float*>(&m);
const float* b = reinterpret_cast<const float*>(&n);
- Packet4f res = {*(a + (mask & 3)), *(b + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3)), *(b + ((mask >> 6) & 3))};
+ Packet4f res = make_packet4f(*(a + (mask & 3)), *(b + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3)), *(b + ((mask >> 6) & 3)));
return res;
}
@@ -148,7 +161,7 @@
#define EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i p4i_##NAME = pset1<Packet4i>(X)
-#if EIGEN_ARCH_ARM64
+#if EIGEN_ARCH_ARM64 && EIGEN_COMP_GNUC
// __builtin_prefetch tends to do nothing on ARM64 compilers because the
// prefetch instructions there are too detailed for __builtin_prefetch to map
// meaningfully to them.
@@ -865,12 +878,12 @@
template<> EIGEN_STRONG_INLINE Packet2f pxor<Packet2f>(const Packet2f& a, const Packet2f& b);
template<> EIGEN_STRONG_INLINE Packet2f paddsub<Packet2f>(const Packet2f& a, const Packet2f & b) {
- Packet2f mask = {numext::bit_cast<float>(0x80000000u), 0.0f};
+ Packet2f mask = make_packet2f(numext::bit_cast<float>(0x80000000u), 0.0f);
return padd(a, pxor(mask, b));
}
template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b);
template<> EIGEN_STRONG_INLINE Packet4f paddsub<Packet4f>(const Packet4f& a, const Packet4f& b) {
- Packet4f mask = {numext::bit_cast<float>(0x80000000u), 0.0f, numext::bit_cast<float>(0x80000000u), 0.0f};
+ Packet4f mask = make_packet4f(numext::bit_cast<float>(0x80000000u), 0.0f, numext::bit_cast<float>(0x80000000u), 0.0f);
return padd(a, pxor(mask, b));
}
@@ -2536,7 +2549,7 @@
template<> EIGEN_STRONG_INLINE float predux_mul<Packet2f>(const Packet2f& a)
{ return vget_lane_f32(a, 0) * vget_lane_f32(a, 1); }
template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
-{ return predux_mul(vmul_f32(vget_low_f32(a), vget_high_f32(a))); }
+{ return predux_mul<Packet2f>(vmul_f32(vget_low_f32(a), vget_high_f32(a))); }
template<> EIGEN_STRONG_INLINE int8_t predux_mul<Packet4c>(const Packet4c& a)
{
int8x8_t prod = vreinterpret_s8_s32(vdup_n_s32(a));
@@ -2550,7 +2563,7 @@
return vget_lane_s8(prod, 0) * vget_lane_s8(prod, 4);
}
template<> EIGEN_STRONG_INLINE int8_t predux_mul<Packet16c>(const Packet16c& a)
-{ return predux_mul(vmul_s8(vget_low_s8(a), vget_high_s8(a))); }
+{ return predux_mul<Packet8c>(vmul_s8(vget_low_s8(a), vget_high_s8(a))); }
template<> EIGEN_STRONG_INLINE uint8_t predux_mul<Packet4uc>(const Packet4uc& a)
{
uint8x8_t prod = vreinterpret_u8_u32(vdup_n_u32(a));
@@ -2564,7 +2577,7 @@
return vget_lane_u8(prod, 0) * vget_lane_u8(prod, 4);
}
template<> EIGEN_STRONG_INLINE uint8_t predux_mul<Packet16uc>(const Packet16uc& a)
-{ return predux_mul(vmul_u8(vget_low_u8(a), vget_high_u8(a))); }
+{ return predux_mul<Packet8uc>(vmul_u8(vget_low_u8(a), vget_high_u8(a))); }
template<> EIGEN_STRONG_INLINE int16_t predux_mul<Packet4s>(const Packet4s& a)
{
const int16x4_t prod = vmul_s16(a, vrev32_s16(a));
@@ -2600,11 +2613,11 @@
template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet2i>(const Packet2i& a)
{ return vget_lane_s32(a, 0) * vget_lane_s32(a, 1); }
template<> EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a)
-{ return predux_mul(vmul_s32(vget_low_s32(a), vget_high_s32(a))); }
+{ return predux_mul<Packet2i>(vmul_s32(vget_low_s32(a), vget_high_s32(a))); }
template<> EIGEN_STRONG_INLINE uint32_t predux_mul<Packet2ui>(const Packet2ui& a)
{ return vget_lane_u32(a, 0) * vget_lane_u32(a, 1); }
template<> EIGEN_STRONG_INLINE uint32_t predux_mul<Packet4ui>(const Packet4ui& a)
-{ return predux_mul(vmul_u32(vget_low_u32(a), vget_high_u32(a))); }
+{ return predux_mul<Packet2ui>(vmul_u32(vget_low_u32(a), vget_high_u32(a))); }
template<> EIGEN_STRONG_INLINE int64_t predux_mul<Packet2l>(const Packet2l& a)
{ return vgetq_lane_s64(a, 0) * vgetq_lane_s64(a, 1); }
template<> EIGEN_STRONG_INLINE uint64_t predux_mul<Packet2ul>(const Packet2ul& a)
@@ -3457,7 +3470,7 @@
{
// See the scalar implementation in BFloat16.h for a comprehensible explanation
// of this fast rounding algorithm
- Packet4ui input = reinterpret_cast<Packet4ui>(p);
+ Packet4ui input = Packet4ui(vreinterpretq_u32_f32(p));
// lsb = (input >> 16) & 1
Packet4ui lsb = vandq_u32(vshrq_n_u32(input, 16), vdupq_n_u32(1));
@@ -3482,7 +3495,7 @@
EIGEN_STRONG_INLINE Packet4f Bf16ToF32(const Packet4bf& p)
{
- return reinterpret_cast<Packet4f>(vshlq_n_u32(vmovl_u16(p), 16));
+ return Packet4f(vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(p), 16)));
}
EIGEN_STRONG_INLINE Packet4bf F32MaskToBf16Mask(const Packet4f& p) {
@@ -3490,21 +3503,21 @@
}
template<> EIGEN_STRONG_INLINE Packet4bf pset1<Packet4bf>(const bfloat16& from) {
- return pset1<Packet4us>(from.value);
+ return Packet4bf(pset1<Packet4us>(from.value));
}
template<> EIGEN_STRONG_INLINE bfloat16 pfirst<Packet4bf>(const Packet4bf& from) {
- return bfloat16_impl::raw_uint16_to_bfloat16(static_cast<uint16_t>(pfirst<Packet4us>(from)));
+ return bfloat16_impl::raw_uint16_to_bfloat16(static_cast<uint16_t>(pfirst<Packet4us>(Packet4us(from))));
}
template<> EIGEN_STRONG_INLINE Packet4bf pload<Packet4bf>(const bfloat16* from)
{
- return pload<Packet4us>(reinterpret_cast<const uint16_t*>(from));
+ return Packet4bf(pload<Packet4us>(reinterpret_cast<const uint16_t*>(from)));
}
template<> EIGEN_STRONG_INLINE Packet4bf ploadu<Packet4bf>(const bfloat16* from)
{
- return ploadu<Packet4us>(reinterpret_cast<const uint16_t*>(from));
+ return Packet4bf(ploadu<Packet4us>(reinterpret_cast<const uint16_t*>(from)));
}
template<> EIGEN_STRONG_INLINE void pstore<bfloat16>(bfloat16* to, const Packet4bf& from)
@@ -3519,7 +3532,7 @@
template<> EIGEN_STRONG_INLINE Packet4bf ploaddup<Packet4bf>(const bfloat16* from)
{
- return ploaddup<Packet4us>(reinterpret_cast<const uint16_t*>(from));
+ return Packet4bf(ploaddup<Packet4us>(reinterpret_cast<const uint16_t*>(from)));
}
template <> EIGEN_STRONG_INLINE Packet4bf pabs(const Packet4bf& a) {
@@ -3566,25 +3579,25 @@
}
template<> EIGEN_STRONG_INLINE Packet4bf por(const Packet4bf& a,const Packet4bf& b) {
- return por<Packet4us>(a, b);
+ return Packet4bf(por<Packet4us>(Packet4us(a), Packet4us(b)));
}
template<> EIGEN_STRONG_INLINE Packet4bf pxor(const Packet4bf& a,const Packet4bf& b) {
- return pxor<Packet4us>(a, b);
+ return Packet4bf(pxor<Packet4us>(Packet4us(a), Packet4us(b)));
}
template<> EIGEN_STRONG_INLINE Packet4bf pand(const Packet4bf& a,const Packet4bf& b) {
- return pand<Packet4us>(a, b);
+ return Packet4bf(pand<Packet4us>(Packet4us(a), Packet4us(b)));
}
template<> EIGEN_STRONG_INLINE Packet4bf pandnot(const Packet4bf& a,const Packet4bf& b) {
- return pandnot<Packet4us>(a, b);
+ return Packet4bf(pandnot<Packet4us>(Packet4us(a), Packet4us(b)));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4bf pselect(const Packet4bf& mask, const Packet4bf& a,
const Packet4bf& b)
{
- return pselect<Packet4us>(mask, a, b);
+ return Packet4bf(pselect<Packet4us>(Packet4us(mask), Packet4us(a), Packet4us(b)));
}
template<> EIGEN_STRONG_INLINE Packet4bf print<Packet4bf>(const Packet4bf& a)
@@ -3623,13 +3636,13 @@
template<>
EIGEN_STRONG_INLINE Packet4bf pgather<bfloat16, Packet4bf>(const bfloat16* from, Index stride)
{
- return pgather<uint16_t, Packet4us>(reinterpret_cast<const uint16_t*>(from), stride);
+ return Packet4bf(pgather<uint16_t, Packet4us>(reinterpret_cast<const uint16_t*>(from), stride));
}
template<>
EIGEN_STRONG_INLINE void pscatter<bfloat16, Packet4bf>(bfloat16* to, const Packet4bf& from, Index stride)
{
- pscatter<uint16_t, Packet4us>(reinterpret_cast<uint16_t*>(to), from, stride);
+ pscatter<uint16_t, Packet4us>(reinterpret_cast<uint16_t*>(to), Packet4us(from), stride);
}
template<> EIGEN_STRONG_INLINE bfloat16 predux<Packet4bf>(const Packet4bf& a)
@@ -3654,7 +3667,7 @@
template<> EIGEN_STRONG_INLINE Packet4bf preverse<Packet4bf>(const Packet4bf& a)
{
- return preverse<Packet4us>(a);
+ return Packet4bf(preverse<Packet4us>(Packet4us(a)));
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet4bf, 4>& kernel)
@@ -3689,7 +3702,7 @@
template<> EIGEN_STRONG_INLINE Packet4bf pnegate<Packet4bf>(const Packet4bf& a)
{
- return pxor<Packet4us>(a, pset1<Packet4us>(static_cast<uint16_t>(0x8000)));
+ return Packet4bf(pxor<Packet4us>(Packet4us(a), pset1<Packet4us>(static_cast<uint16_t>(0x8000))));
}
//---------- double ----------
@@ -3707,17 +3720,34 @@
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
+#if EIGEN_COMP_GNUC
// Bug 907: workaround missing declarations of the following two functions in the ADK
// Defining these functions as templates ensures that if these intrinsics are
// already defined in arm_neon.h, then our workaround doesn't cause a conflict
// and has lower priority in overload resolution.
+// This doesn't work with MSVC though, since the function names are macros.
template <typename T> uint64x2_t vreinterpretq_u64_f64(T a) { return (uint64x2_t) a; }
template <typename T> float64x2_t vreinterpretq_f64_u64(T a) { return (float64x2_t) a; }
+#endif
+#if EIGEN_COMP_MSVC_STRICT
+typedef eigen_packet_wrapper<float64x2_t, 18> Packet2d;
+typedef eigen_packet_wrapper<float64x1_t, 19> Packet1d;
+
+EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b) {
+ double from[2] = {a, b};
+ return vld1q_f64(from);
+}
+
+#else
typedef float64x2_t Packet2d;
typedef float64x1_t Packet1d;
+EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b) { return {a, b}; }
+#endif
+
+
// fuctionally equivalent to _mm_shuffle_pd in SSE (i.e. shuffle(m, n, mask) equals _mm_shuffle_pd(m,n,mask))
// Currently used in LU/arch/InverseSize4.h to enable a shared implementation
// for fast inversion of matrices of size 4.
@@ -3725,7 +3755,7 @@
{
const double* a = reinterpret_cast<const double*>(&m);
const double* b = reinterpret_cast<const double*>(&n);
- Packet2d res = {*(a + (mask & 1)), *(b + ((mask >> 1) & 1))};
+ Packet2d res = make_packet2d(*(a + (mask & 1)), *(b + ((mask >> 1) & 1)));
return res;
}
@@ -3819,7 +3849,7 @@
template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& , const Packet2d& );
template<> EIGEN_STRONG_INLINE Packet2d paddsub<Packet2d>(const Packet2d& a, const Packet2d& b){
- const Packet2d mask = {numext::bit_cast<double>(0x8000000000000000ull),0.0};
+ const Packet2d mask = make_packet2d(numext::bit_cast<double>(0x8000000000000000ull), 0.0);
return padd(a, pxor(mask, b));
}
@@ -3933,7 +3963,7 @@
{ return (vget_low_f64(a) * vget_high_f64(a))[0]; }
#else
template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
-{ return vget_lane_f64(vget_low_f64(a) * vget_high_f64(a), 0); }
+{ return vget_lane_f64(vmul_f64(vget_low_f64(a), vget_high_f64(a)), 0); }
#endif
// min
diff --git a/Eigen/src/Core/arch/NEON/TypeCasting.h b/Eigen/src/Core/arch/NEON/TypeCasting.h
index e5ddab6..59368f8 100644
--- a/Eigen/src/Core/arch/NEON/TypeCasting.h
+++ b/Eigen/src/Core/arch/NEON/TypeCasting.h
@@ -18,6 +18,113 @@
namespace internal {
//==============================================================================
+// preinterpret
+//==============================================================================
+template <>
+EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2i>(const Packet2i& a) {
+ return Packet2f(vreinterpret_f32_s32(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2ui>(const Packet2ui& a) {
+ return Packet2f(vreinterpret_f32_u32(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(const Packet4i& a) {
+ return Packet4f(vreinterpretq_f32_s32(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4ui>(const Packet4ui& a) {
+ return Packet4f(vreinterpretq_f32_u32(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc& a) {
+ return static_cast<Packet4c>(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet8uc>(const Packet8uc& a) {
+ return Packet8c(preinterpret<Packet8c>(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) {
+ return Packet16c(vreinterpretq_s8_u8(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet4c>(const Packet4c& a) {
+ return static_cast<Packet4uc>(a);
+}
+template <>
+EIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc, Packet8c>(const Packet8c& a) {
+ return Packet8uc(vreinterpret_u8_s8(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet16uc preinterpret<Packet16uc, Packet16c>(const Packet16c& a) {
+ return Packet16uc(vreinterpretq_u8_s8(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s, Packet4us>(const Packet4us& a) {
+ return Packet4s(vreinterpret_s16_u16(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8s preinterpret<Packet8s, Packet8us>(const Packet8us& a) {
+ return Packet8s(vreinterpretq_s16_u16(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet4s>(const Packet4s& a) {
+ return Packet4us(vreinterpret_u16_s16(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet8us preinterpret<Packet8us, Packet8s>(const Packet8s& a) {
+ return Packet8us(vreinterpretq_u16_s16(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2f>(const Packet2f& a) {
+ return Packet2i(vreinterpret_s32_f32(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2ui>(const Packet2ui& a) {
+ return Packet2i(vreinterpret_s32_u32(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4f>(const Packet4f& a) {
+ return Packet4i(vreinterpretq_s32_f32(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
+ return Packet4i(vreinterpretq_s32_u32(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2f>(const Packet2f& a) {
+ return Packet2ui(vreinterpret_u32_f32(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2i>(const Packet2i& a) {
+ return Packet2ui(vreinterpret_u32_s32(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4f>(const Packet4f& a) {
+ return Packet4ui(vreinterpretq_u32_f32(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) {
+ return Packet4ui(vreinterpretq_u32_s32(a));
+}
+
+template <>
+EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2ul>(const Packet2ul& a) {
+ return Packet2l(vreinterpretq_s64_u64(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2l>(const Packet2l& a) {
+ return Packet2ul(vreinterpretq_u64_s64(a));
+}
+
+//==============================================================================
// pcast, SrcType = float
//==============================================================================
template <>
@@ -190,7 +297,7 @@
};
template <>
EIGEN_STRONG_INLINE Packet2ul pcast<Packet16c, Packet2ul>(const Packet16c& a) {
- return vreinterpretq_u64_s64(pcast<Packet16c, Packet2l>(a));
+ return preinterpret<Packet2ul>(pcast<Packet16c, Packet2l>(a));
}
template <>
@@ -214,11 +321,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet4ui pcast<Packet16c, Packet4ui>(const Packet16c& a) {
- return vreinterpretq_u32_s32(pcast<Packet16c, Packet4i>(a));
+ return preinterpret<Packet4ui>(pcast<Packet16c, Packet4i>(a));
}
template <>
EIGEN_STRONG_INLINE Packet2ui pcast<Packet8c, Packet2ui>(const Packet8c& a) {
- return vreinterpret_u32_s32(pcast<Packet8c, Packet2i>(a));
+ return preinterpret<Packet2ui>(pcast<Packet8c, Packet2i>(a));
}
template <>
@@ -242,11 +349,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet8us pcast<Packet16c, Packet8us>(const Packet16c& a) {
- return vreinterpretq_u16_s16(pcast<Packet16c, Packet8s>(a));
+ return preinterpret<Packet8us>(pcast<Packet16c, Packet8s>(a));
}
template <>
EIGEN_STRONG_INLINE Packet4us pcast<Packet8c, Packet4us>(const Packet8c& a) {
- return vreinterpret_u16_s16(pcast<Packet8c, Packet4s>(a));
+ return preinterpret<Packet4us>(pcast<Packet8c, Packet4s>(a));
}
template <>
@@ -272,11 +379,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet16uc pcast<Packet16c, Packet16uc>(const Packet16c& a) {
- return vreinterpretq_u8_s8(a);
+ return preinterpret<Packet16uc>(a);
}
template <>
EIGEN_STRONG_INLINE Packet8uc pcast<Packet8c, Packet8uc>(const Packet8c& a) {
- return vreinterpret_u8_s8(a);
+ return preinterpret<Packet8uc>(a);
}
template <>
EIGEN_STRONG_INLINE Packet4uc pcast<Packet4c, Packet4uc>(const Packet4c& a) {
@@ -317,7 +424,7 @@
};
template <>
EIGEN_STRONG_INLINE Packet2l pcast<Packet16uc, Packet2l>(const Packet16uc& a) {
- return vreinterpretq_s64_u64(pcast<Packet16uc, Packet2ul>(a));
+ return preinterpret<Packet2l>(pcast<Packet16uc, Packet2ul>(a));
}
template <>
@@ -341,11 +448,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet4i pcast<Packet16uc, Packet4i>(const Packet16uc& a) {
- return vreinterpretq_s32_u32(pcast<Packet16uc, Packet4ui>(a));
+ return preinterpret<Packet4i>(pcast<Packet16uc, Packet4ui>(a));
}
template <>
EIGEN_STRONG_INLINE Packet2i pcast<Packet8uc, Packet2i>(const Packet8uc& a) {
- return vreinterpret_s32_u32(pcast<Packet8uc, Packet2ui>(a));
+ return preinterpret<Packet2i>(pcast<Packet8uc, Packet2ui>(a));
}
template <>
@@ -369,11 +476,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet8s pcast<Packet16uc, Packet8s>(const Packet16uc& a) {
- return vreinterpretq_s16_u16(pcast<Packet16uc, Packet8us>(a));
+ return preinterpret<Packet8s>(pcast<Packet16uc, Packet8us>(a));
}
template <>
EIGEN_STRONG_INLINE Packet4s pcast<Packet8uc, Packet4s>(const Packet8uc& a) {
- return vreinterpret_s16_u16(pcast<Packet8uc, Packet4us>(a));
+ return preinterpret<Packet4s>(pcast<Packet8uc, Packet4us>(a));
}
template <>
@@ -399,11 +506,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet16c pcast<Packet16uc, Packet16c>(const Packet16uc& a) {
- return vreinterpretq_s8_u8(a);
+ return preinterpret<Packet16c>(a);
}
template <>
EIGEN_STRONG_INLINE Packet8c pcast<Packet8uc, Packet8c>(const Packet8uc& a) {
- return vreinterpret_s8_u8(a);
+ return preinterpret<Packet8c>(a);
}
template <>
EIGEN_STRONG_INLINE Packet4c pcast<Packet4uc, Packet4c>(const Packet4uc& a) {
@@ -444,7 +551,7 @@
};
template <>
EIGEN_STRONG_INLINE Packet2ul pcast<Packet8s, Packet2ul>(const Packet8s& a) {
- return vreinterpretq_u64_s64(pcast<Packet8s, Packet2l>(a));
+ return preinterpret<Packet2ul>(pcast<Packet8s, Packet2l>(a));
}
template <>
@@ -468,11 +575,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet4ui pcast<Packet8s, Packet4ui>(const Packet8s& a) {
- return vreinterpretq_u32_s32(pcast<Packet8s, Packet4i>(a));
+ return preinterpret<Packet4ui>(pcast<Packet8s, Packet4i>(a));
}
template <>
EIGEN_STRONG_INLINE Packet2ui pcast<Packet4s, Packet2ui>(const Packet4s& a) {
- return vreinterpret_u32_s32(pcast<Packet4s, Packet2i>(a));
+ return preinterpret<Packet2ui>(pcast<Packet4s, Packet2i>(a));
}
template <>
@@ -494,11 +601,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet8us pcast<Packet8s, Packet8us>(const Packet8s& a) {
- return vreinterpretq_u16_s16(a);
+ return preinterpret<Packet8us>(a);
}
template <>
EIGEN_STRONG_INLINE Packet4us pcast<Packet4s, Packet4us>(const Packet4s& a) {
- return vreinterpret_u16_s16(a);
+ return preinterpret<Packet4us>(a);
}
template <>
@@ -561,7 +668,7 @@
};
template <>
EIGEN_STRONG_INLINE Packet2l pcast<Packet8us, Packet2l>(const Packet8us& a) {
- return vreinterpretq_s64_u64(pcast<Packet8us, Packet2ul>(a));
+ return preinterpret<Packet2l>(pcast<Packet8us, Packet2ul>(a));
}
template <>
@@ -585,11 +692,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet4i pcast<Packet8us, Packet4i>(const Packet8us& a) {
- return vreinterpretq_s32_u32(pcast<Packet8us, Packet4ui>(a));
+ return preinterpret<Packet4i>(pcast<Packet8us, Packet4ui>(a));
}
template <>
EIGEN_STRONG_INLINE Packet2i pcast<Packet4us, Packet2i>(const Packet4us& a) {
- return vreinterpret_s32_u32(pcast<Packet4us, Packet2ui>(a));
+ return preinterpret<Packet2i>(pcast<Packet4us, Packet2ui>(a));
}
template <>
@@ -611,11 +718,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet8s pcast<Packet8us, Packet8s>(const Packet8us& a) {
- return vreinterpretq_s16_u16(a);
+ return preinterpret<Packet8s>(a);
}
template <>
EIGEN_STRONG_INLINE Packet4s pcast<Packet4us, Packet4s>(const Packet4us& a) {
- return vreinterpret_s16_u16(a);
+ return preinterpret<Packet4s>(a);
}
template <>
@@ -637,11 +744,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet16c pcast<Packet8us, Packet16c>(const Packet8us& a, const Packet8us& b) {
- return vreinterpretq_s8_u8(pcast<Packet8us, Packet16uc>(a, b));
+ return preinterpret<Packet16c>(pcast<Packet8us, Packet16uc>(a, b));
}
template <>
EIGEN_STRONG_INLINE Packet8c pcast<Packet4us, Packet8c>(const Packet4us& a, const Packet4us& b) {
- return vreinterpret_s8_u8(pcast<Packet4us, Packet8uc>(a, b));
+ return preinterpret<Packet8c>(pcast<Packet4us, Packet8uc>(a, b));
}
//==============================================================================
@@ -676,7 +783,7 @@
};
template <>
EIGEN_STRONG_INLINE Packet2ul pcast<Packet4i, Packet2ul>(const Packet4i& a) {
- return vreinterpretq_u64_s64(pcast<Packet4i, Packet2l>(a));
+ return preinterpret<Packet2ul>(pcast<Packet4i, Packet2l>(a));
}
template <>
@@ -698,11 +805,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet4ui pcast<Packet4i, Packet4ui>(const Packet4i& a) {
- return vreinterpretq_u32_s32(a);
+ return preinterpret<Packet4ui>(a);
}
template <>
EIGEN_STRONG_INLINE Packet2ui pcast<Packet2i, Packet2ui>(const Packet2i& a) {
- return vreinterpret_u32_s32(a);
+ return preinterpret<Packet2ui>(a);
}
template <>
@@ -801,7 +908,7 @@
};
template <>
EIGEN_STRONG_INLINE Packet2l pcast<Packet4ui, Packet2l>(const Packet4ui& a) {
- return vreinterpretq_s64_u64(pcast<Packet4ui, Packet2ul>(a));
+ return preinterpret<Packet2l>(pcast<Packet4ui, Packet2ul>(a));
}
template <>
@@ -823,11 +930,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet4i pcast<Packet4ui, Packet4i>(const Packet4ui& a) {
- return vreinterpretq_s32_u32(a);
+ return preinterpret<Packet4i>(a);
}
template <>
EIGEN_STRONG_INLINE Packet2i pcast<Packet2ui, Packet2i>(const Packet2ui& a) {
- return vreinterpret_s32_u32(a);
+ return preinterpret<Packet2i>(a);
}
template <>
@@ -849,11 +956,11 @@
};
template <>
EIGEN_STRONG_INLINE Packet8s pcast<Packet4ui, Packet8s>(const Packet4ui& a, const Packet4ui& b) {
- return vreinterpretq_s16_u16(pcast<Packet4ui, Packet8us>(a, b));
+ return preinterpret<Packet8s>(pcast<Packet4ui, Packet8us>(a, b));
}
template <>
EIGEN_STRONG_INLINE Packet4s pcast<Packet2ui, Packet4s>(const Packet2ui& a, const Packet2ui& b) {
- return vreinterpret_s16_u16(pcast<Packet2ui, Packet4us>(a, b));
+ return preinterpret<Packet4s>(pcast<Packet2ui, Packet4us>(a, b));
}
template <>
@@ -882,12 +989,12 @@
template <>
EIGEN_STRONG_INLINE Packet16c pcast<Packet4ui, Packet16c>(const Packet4ui& a, const Packet4ui& b, const Packet4ui& c,
const Packet4ui& d) {
- return vreinterpretq_s8_u8(pcast<Packet4ui, Packet16uc>(a, b, c, d));
+ return preinterpret<Packet16c>(pcast<Packet4ui, Packet16uc>(a, b, c, d));
}
template <>
EIGEN_STRONG_INLINE Packet8c pcast<Packet2ui, Packet8c>(const Packet2ui& a, const Packet2ui& b, const Packet2ui& c,
const Packet2ui& d) {
- return vreinterpret_s8_u8(pcast<Packet2ui, Packet8uc>(a, b, c, d));
+ return preinterpret<Packet8c>(pcast<Packet2ui, Packet8uc>(a, b, c, d));
}
//==============================================================================
@@ -917,7 +1024,7 @@
};
template <>
EIGEN_STRONG_INLINE Packet2ul pcast<Packet2l, Packet2ul>(const Packet2l& a) {
- return vreinterpretq_u64_s64(a);
+ return preinterpret<Packet2ul>(a);
}
template <>
@@ -1015,7 +1122,7 @@
};
template <>
EIGEN_STRONG_INLINE Packet2l pcast<Packet2ul, Packet2l>(const Packet2ul& a) {
- return vreinterpretq_s64_u64(a);
+ return preinterpret<Packet2l>(a);
}
template <>
@@ -1033,7 +1140,7 @@
};
template <>
EIGEN_STRONG_INLINE Packet4i pcast<Packet2ul, Packet4i>(const Packet2ul& a, const Packet2ul& b) {
- return vreinterpretq_s32_u32(pcast<Packet2ul, Packet4ui>(a, b));
+ return preinterpret<Packet4i>(pcast<Packet2ul, Packet4ui>(a, b));
}
template <>
@@ -1055,7 +1162,7 @@
template <>
EIGEN_STRONG_INLINE Packet8s pcast<Packet2ul, Packet8s>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
const Packet2ul& d) {
- return vreinterpretq_s16_u16(pcast<Packet2ul, Packet8us>(a, b, c, d));
+ return preinterpret<Packet8s>(pcast<Packet2ul, Packet8us>(a, b, c, d));
}
template <>
@@ -1079,114 +1186,7 @@
EIGEN_STRONG_INLINE Packet16c pcast<Packet2ul, Packet16c>(const Packet2ul& a, const Packet2ul& b, const Packet2ul& c,
const Packet2ul& d, const Packet2ul& e, const Packet2ul& f,
const Packet2ul& g, const Packet2ul& h) {
- return vreinterpretq_s8_u8(pcast<Packet2ul, Packet16uc>(a, b, c, d, e, f, g, h));
-}
-
-//==============================================================================
-// preinterpret
-//==============================================================================
-template <>
-EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2i>(const Packet2i& a) {
- return vreinterpret_f32_s32(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2f preinterpret<Packet2f, Packet2ui>(const Packet2ui& a) {
- return vreinterpret_f32_u32(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4i>(const Packet4i& a) {
- return vreinterpretq_f32_s32(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet4ui>(const Packet4ui& a) {
- return vreinterpretq_f32_u32(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc& a) {
- return static_cast<Packet4c>(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet8uc>(const Packet8uc& a) {
- return vreinterpret_s8_u8(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) {
- return vreinterpretq_s8_u8(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4uc preinterpret<Packet4uc, Packet4c>(const Packet4c& a) {
- return static_cast<Packet4uc>(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet8uc preinterpret<Packet8uc, Packet8c>(const Packet8c& a) {
- return vreinterpret_u8_s8(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet16uc preinterpret<Packet16uc, Packet16c>(const Packet16c& a) {
- return vreinterpretq_u8_s8(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4s preinterpret<Packet4s, Packet4us>(const Packet4us& a) {
- return vreinterpret_s16_u16(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet8s preinterpret<Packet8s, Packet8us>(const Packet8us& a) {
- return vreinterpretq_s16_u16(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet4us preinterpret<Packet4us, Packet4s>(const Packet4s& a) {
- return vreinterpret_u16_s16(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet8us preinterpret<Packet8us, Packet8s>(const Packet8s& a) {
- return vreinterpretq_u16_s16(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2f>(const Packet2f& a) {
- return vreinterpret_s32_f32(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2i preinterpret<Packet2i, Packet2ui>(const Packet2ui& a) {
- return vreinterpret_s32_u32(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4f>(const Packet4f& a) {
- return vreinterpretq_s32_f32(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
- return vreinterpretq_s32_u32(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2f>(const Packet2f& a) {
- return vreinterpret_u32_f32(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2ui preinterpret<Packet2ui, Packet2i>(const Packet2i& a) {
- return vreinterpret_u32_s32(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4f>(const Packet4f& a) {
- return vreinterpretq_u32_f32(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) {
- return vreinterpretq_u32_s32(a);
-}
-
-template <>
-EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2ul>(const Packet2ul& a) {
- return vreinterpretq_s64_u64(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2l>(const Packet2l& a) {
- return vreinterpretq_u64_s64(a);
+ return preinterpret<Packet16c>(pcast<Packet2ul, Packet16uc>(a, b, c, d, e, f, g, h));
}
#if EIGEN_ARCH_ARM64
@@ -1196,6 +1196,31 @@
//==============================================================================
template <>
+EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {
+ return Packet2d(vreinterpretq_f64_s64(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2ul>(const Packet2ul& a) {
+ return Packet2d(vreinterpretq_f64_u64(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {
+ return Packet2l(vreinterpretq_s64_f64(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2d>(const Packet2d& a) {
+ return Packet2ul(vreinterpretq_u64_f64(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(const Packet4i& a) {
+ return Packet2d(vreinterpretq_f64_s32(a));
+}
+template <>
+EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {
+ return Packet4i(vreinterpretq_s32_f64(a));
+}
+
+template <>
struct type_casting_traits<double, double> {
enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
};
@@ -1316,7 +1341,9 @@
template <>
EIGEN_STRONG_INLINE Packet2d pcast<Packet16c, Packet2d>(const Packet16c& a) {
// Discard all but first two values.
- return vcvt_f64_f32(pcast<Packet8c, Packet2f>(vget_low_s8(a)));
+ // MSVC defines most intrinsics as macros, so we need to do this in two lines for portability.
+ Packet2f tmp = pcast<Packet8c, Packet2f>(vget_low_s8(a));
+ return vcvt_f64_f32(tmp);
}
template <>
@@ -1326,7 +1353,8 @@
template <>
EIGEN_STRONG_INLINE Packet2d pcast<Packet16uc, Packet2d>(const Packet16uc& a) {
// Discard all but first two values.
- return vcvt_f64_f32(pcast<Packet8uc, Packet2f>(vget_low_u8(a)));
+ Packet2f tmp = pcast<Packet8uc, Packet2f>(vget_low_u8(a));
+ return vcvt_f64_f32(tmp);
}
template <>
@@ -1336,7 +1364,8 @@
template <>
EIGEN_STRONG_INLINE Packet2d pcast<Packet8s, Packet2d>(const Packet8s& a) {
// Discard all but first two values.
- return vcvt_f64_f32(pcast<Packet4s, Packet2f>(vget_low_s16(a)));
+ Packet2f tmp = pcast<Packet4s, Packet2f>(vget_low_s16(a));
+ return vcvt_f64_f32(tmp);
}
template <>
@@ -1346,7 +1375,8 @@
template <>
EIGEN_STRONG_INLINE Packet2d pcast<Packet8us, Packet2d>(const Packet8us& a) {
// Discard all but first two values.
- return vcvt_f64_f32(pcast<Packet4us, Packet2f>(vget_low_u16(a)));
+ Packet2f tmp = pcast<Packet4us, Packet2f>(vget_low_s16(a));
+ return vcvt_f64_f32(tmp);
}
template <>
@@ -1387,31 +1417,6 @@
return vcvtq_f64_u64(a);
}
-template <>
-EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2l>(const Packet2l& a) {
- return vreinterpretq_f64_s64(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet2ul>(const Packet2ul& a) {
- return vreinterpretq_f64_u64(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2l preinterpret<Packet2l, Packet2d>(const Packet2d& a) {
- return vreinterpretq_s64_f64(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2ul preinterpret<Packet2ul, Packet2d>(const Packet2d& a) {
- return vreinterpretq_u64_f64(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4i>(const Packet4i& a) {
- return vreinterpretq_f64_s32(a);
-}
-template <>
-EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet2d>(const Packet2d& a) {
- return vreinterpretq_s32_f64(a);
-}
-
#endif // EIGEN_ARCH_ARM64
} // end namespace internal