Reland "Support AVX2/FMA intrinsics in Audio Resampler module"
This is a reland of 1ca8d87239f1209031bbc77a6443bc7ac2dcee8c
Original change's description:
> Support AVX2/FMA intrinsics in Audio Resampler module
>
> From the test result, using AVX2/FMA is 1.60x faster than SSE on atlas.
>
> Bug: webrtc:11663
> Test: common_audio_unittests on atlas and octopus.
> Change-Id: Ibd45ea46aa97d5790a24e5116f741592b95f6416
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/176382
> Reviewed-by: Per Åhgren <peah@webrtc.org>
> Reviewed-by: Henrik Andreassson <henrika@webrtc.org>
> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
> Reviewed-by: Sam Zackrisson <saza@webrtc.org>
> Commit-Queue: Sam Zackrisson <saza@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#31810}
Bug: webrtc:11663
Change-Id: I92f5832a42c0314853c9fead46425c08e2040dc0
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/181800
Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Niels Moller <nisse@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31945}
diff --git a/system_wrappers/source/cpu_features.cc b/system_wrappers/source/cpu_features.cc
index ebcb48c..40110ed 100644
--- a/system_wrappers/source/cpu_features.cc
+++ b/system_wrappers/source/cpu_features.cc
@@ -24,6 +24,22 @@
}
#if defined(WEBRTC_ARCH_X86_FAMILY)
+
+#if defined(WEBRTC_ENABLE_AVX2)
+// xgetbv returns the value of an Intel Extended Control Register (XCR).
+// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
+static uint64_t xgetbv(uint32_t xcr) {
+#if defined(_MSC_VER)
+ return _xgetbv(xcr);
+#else
+ uint32_t eax, edx;
+
+ __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
+ return (static_cast<uint64_t>(edx) << 32) | eax;
+#endif // _MSC_VER
+}
+#endif // WEBRTC_ENABLE_AVX2
+
#ifndef _MSC_VER
// Intrinsic for "cpuid".
#if defined(__pic__) && defined(__i386__)
@@ -41,7 +57,7 @@
__asm__ volatile("cpuid\n"
: "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
"=d"(cpu_info[3])
- : "a"(info_type));
+ : "a"(info_type), "c"(0));
}
#endif
#endif // _MSC_VER
@@ -58,6 +74,30 @@
if (feature == kSSE3) {
return 0 != (cpu_info[2] & 0x00000001);
}
+#if defined(WEBRTC_ENABLE_AVX2)
+ if (feature == kAVX2) {
+ int cpu_info7[4];
+ __cpuid(cpu_info7, 0);
+ int num_ids = cpu_info7[0];
+ if (num_ids < 7) {
+ return 0;
+ }
+ // Interpret CPU feature information.
+ __cpuid(cpu_info7, 7);
+
+ // AVX instructions can be used when
+ // a) AVX are supported by the CPU,
+ // b) XSAVE is supported by the CPU,
+ // c) XSAVE is enabled by the kernel.
+ // See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
+ // AVX2 support needs (avx_support && (cpu_info7[1] & 0x00000020) != 0;).
+ return (cpu_info[2] & 0x10000000) != 0 &&
+ (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
+ (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
+ (xgetbv(0) & 0x00000006) == 6 /* XSAVE enabled by kernel */ &&
+ (cpu_info7[1] & 0x00000020) != 0;
+ }
+#endif // WEBRTC_ENABLE_AVX2
return 0;
}
#else