cborinternal_p.h: Fix AVX2 build with MSVC
MSVC (and I think ICC too) are lacking the simpler, scalar instructions
to convert from single-precision to half-precision and back. Instead, we
need to use the packed data intrinsics.
Fixes #192.
Signed-off-by: Thiago Macieira <thiago.macieira@intel.com>
diff --git a/.appveyor.yml b/.appveyor.yml
index 453fa12..eb70441 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -13,12 +13,12 @@
if /i "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2017" (call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64) & (set QTDIR=C:\Qt\5.12\msvc2017_64)
- if /i "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2019" (call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64) & (set QTDIR=C:\Qt\5.15\msvc2019_64)
+ if /i "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2019" (call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvarsall.bat" x64) & (set QTDIR=C:\Qt\5.15\msvc2019_64) & set CFLAGS=/arch:AVX2
set path=%PATH%;%QTDIR%\bin
build_script:
- cmd: >-
- nmake -f Makefile.nmake -nologo CFLAGS="-W3 -Os -MDd"
+ nmake -f Makefile.nmake -nologo CFLAGS="%CFLAGS% -W3 -Os -MDd"
cd tests
diff --git a/src/cborinternal_p.h b/src/cborinternal_p.h
index a85a929..35b7a79 100644
--- a/src/cborinternal_p.h
+++ b/src/cborinternal_p.h
@@ -37,15 +37,17 @@
#endif
#ifndef CBOR_NO_HALF_FLOAT_TYPE
-# ifdef __F16C__
+# if defined(__F16C__) || defined(__AVX2__)
# include <immintrin.h>
-static inline unsigned short encode_half(double val)
+static inline unsigned short encode_half(float val)
{
- return _cvtss_sh((float)val, 3);
+ __m128i m = _mm_cvtps_ph(_mm_set_ss(val), _MM_FROUND_CUR_DIRECTION);
+ return _mm_extract_epi16(m, 0);
}
-static inline double decode_half(unsigned short half)
+static inline float decode_half(unsigned short half)
{
- return _cvtsh_ss(half);
+ __m128i m = _mm_cvtsi32_si128(half);
+ return _mm_cvtss_f32(_mm_cvtph_ps(m));
}
# else
/* software implementation of float-to-fp16 conversions */