Support AVX2/FMA intrinsics in Audio Resampler module

From the test result, using AVX2/FMA is 1.60x faster than SSE on atlas.

Bug: webrtc:11663
Test: common_audio_unittests on atlas and octopus.
Change-Id: Ibd45ea46aa97d5790a24e5116f741592b95f6416
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/176382
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Reviewed-by: Henrik Andreassson <henrika@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31810}
diff --git a/system_wrappers/source/cpu_features.cc b/system_wrappers/source/cpu_features.cc
index ebcb48c..1667e46 100644
--- a/system_wrappers/source/cpu_features.cc
+++ b/system_wrappers/source/cpu_features.cc
@@ -24,6 +24,20 @@
 }
 
 #if defined(WEBRTC_ARCH_X86_FAMILY)
+
+// xgetbv returns the value of an Intel Extended Control Register (XCR).
+// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
+uint64_t xgetbv(uint32_t xcr) {
+#if defined(_MSC_VER)
+  return _xgetbv(xcr);
+#else
+  uint32_t eax, edx;
+
+  __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
+  return (static_cast<uint64_t>(edx) << 32) | eax;
+#endif  // _MSC_VER
+}
+
 #ifndef _MSC_VER
 // Intrinsic for "cpuid".
 #if defined(__pic__) && defined(__i386__)
@@ -41,7 +55,7 @@
   __asm__ volatile("cpuid\n"
                    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]),
                      "=d"(cpu_info[3])
-                   : "a"(info_type));
+                   : "a"(info_type), "c"(0));
 }
 #endif
 #endif  // _MSC_VER
@@ -51,6 +65,8 @@
 // Actual feature detection for x86.
 static int GetCPUInfo(CPUFeature feature) {
   int cpu_info[4];
+  __cpuid(cpu_info, 0);
+  int num_ids = cpu_info[0];
   __cpuid(cpu_info, 1);
   if (feature == kSSE2) {
     return 0 != (cpu_info[3] & 0x04000000);
@@ -58,6 +74,23 @@
   if (feature == kSSE3) {
     return 0 != (cpu_info[2] & 0x00000001);
   }
+  if (feature == kAVX2) {
+    // Interpret CPU feature information.
+    int cpu_info7[4] = {-1};
+    if (num_ids >= 7) {
+      __cpuid(cpu_info7, 7);
+    }
+
+#if defined(WEBRTC_ENABLE_AVX2)
+    return (cpu_info[2] & 0x10000000) != 0 &&
+           (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
+           (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
+           (xgetbv(0) & 0x00000006) == 6 /* XSAVE enabled by kernel */ &&
+           (cpu_info7[1] & 0x00000020) != 0;
+#else
+    return 0;
+#endif  // WEBRTC_ENABLE_AVX2
+  }
   return 0;
 }
 #else