Support AVX2/FMA intrinsics in Audio Resampler module
From the test result, using AVX2/FMA is 1.60x faster than SSE on atlas.
Bug: webrtc:11663
Test: common_audio_unittests on atlas and octopus.
Change-Id: Ibd45ea46aa97d5790a24e5116f741592b95f6416
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/176382
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Reviewed-by: Henrik Andreassson <henrika@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31810}
diff --git a/system_wrappers/source/cpu_features.cc b/system_wrappers/source/cpu_features.cc
index ebcb48c..1667e46 100644
--- a/system_wrappers/source/cpu_features.cc
+++ b/system_wrappers/source/cpu_features.cc
@@ -24,6 +24,20 @@
}
#if defined(WEBRTC_ARCH_X86_FAMILY)
+
+// xgetbv returns the value of an Intel Extended Control Register (XCR).
+// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
+uint64_t xgetbv(uint32_t xcr) {
+#if defined(_MSC_VER)
+ return _xgetbv(xcr);
+#else
+ uint32_t eax, edx;
+
+ __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
+ return (static_cast<uint64_t>(edx) << 32) | eax;
+#endif // _MSC_VER
+}
+
#ifndef _MSC_VER
// Intrinsic for "cpuid".
#if defined(__pic__) && defined(__i386__)
@@ -41,7 +55,7 @@
__asm__ volatile("cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
- : "a"(info_type));
+ : "a"(info_type), "c"(0));
}
#endif
#endif // _MSC_VER
@@ -51,6 +65,8 @@
// Actual feature detection for x86.
static int GetCPUInfo(CPUFeature feature) {
int cpu_info[4];
+ __cpuid(cpu_info, 0);
+ int num_ids = cpu_info[0];
__cpuid(cpu_info, 1);
if (feature == kSSE2) {
return 0 != (cpu_info[3] & 0x04000000);
@@ -58,6 +74,23 @@
if (feature == kSSE3) {
return 0 != (cpu_info[2] & 0x00000001);
}
+ if (feature == kAVX2) {
+ // Interpret CPU feature information.
+ int cpu_info7[4] = {-1};
+ if (num_ids >= 7) {
+ __cpuid(cpu_info7, 7);
+ }
+
+#if defined(WEBRTC_ENABLE_AVX2)
+ return (cpu_info[2] & 0x10000000) != 0 &&
+ (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
+ (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
+ (xgetbv(0) & 0x00000006) == 6 /* XSAVE enabled by kernel */ &&
+ (cpu_info7[1] & 0x00000020) != 0;
+#else
+ return 0;
+#endif // WEBRTC_ENABLE_AVX2
+ }
return 0;
}
#else