APM: add HW-only denormal disabler

Denormal numbers (see [1]) may origin in APM when the input is zeroed
after a non-zero signal. In extreme cases, instructions involving
denormal operands may run as much as 100 times slower, which seems to
be the case (to some extent) of crbug.com/1227566.

This CL adds a class that disables denormals only via hardware on x86
and on ARM. The class is used in APM and it is an adaption of [2].

Tested: appr.tc call on Chromium (Win, Mac)

[1] https://en.wikipedia.org/wiki/Denormal_number
[2] https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/platform/audio/denormal_disabler.h

Fixed: chromium:1227566
Change-Id: I0ed2eab55dc597529f09f93c26c7a01de051fdbe
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/227768
Reviewed-by: Magnus Flodman <mflodman@webrtc.org>
Reviewed-by: Per Ã…hgren <peah@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#34701}
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 5acf693..7facd25 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -35,6 +35,7 @@
 #include "rtc_base/ref_counted_object.h"
 #include "rtc_base/time_utils.h"
 #include "rtc_base/trace_event.h"
+#include "system_wrappers/include/denormal_disabler.h"
 #include "system_wrappers/include/field_trial.h"
 #include "system_wrappers/include/metrics.h"
 
@@ -254,6 +255,8 @@
           new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
       use_setup_specific_default_aec3_config_(
           UseSetupSpecificDefaultAec3Congfig()),
+      use_denormal_disabler_(
+          !field_trial::IsEnabled("WebRTC-ApmDenormalDisablerKillSwitch")),
       capture_runtime_settings_(RuntimeSettingQueueSize()),
       render_runtime_settings_(RuntimeSettingQueueSize()),
       capture_runtime_settings_enqueuer_(&capture_runtime_settings_),
@@ -284,6 +287,9 @@
                    << !!submodules_.capture_post_processor
                    << "\nRender pre processor: "
                    << !!submodules_.render_pre_processor;
+  RTC_LOG(LS_INFO) << "Denormal disabler: "
+                   << (DenormalDisabler::IsSupported() ? "supported"
+                                                       : "unsupported");
 
   // Mark Echo Controller enabled if a factory is injected.
   capture_nonlocked_.echo_controller_enabled =
@@ -791,6 +797,7 @@
   RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
 
   MutexLock lock_capture(&mutex_capture_);
+  DenormalDisabler denormal_disabler(use_denormal_disabler_);
 
   if (aec_dump_) {
     RecordUnprocessedCaptureStream(src);
@@ -1080,6 +1087,7 @@
   RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
 
   MutexLock lock_capture(&mutex_capture_);
+  DenormalDisabler denormal_disabler(use_denormal_disabler_);
 
   if (aec_dump_) {
     RecordUnprocessedCaptureStream(src, input_config);
@@ -1109,6 +1117,7 @@
 int AudioProcessingImpl::ProcessCaptureStreamLocked() {
   EmptyQueuedRenderAudioLocked();
   HandleCaptureRuntimeSettings();
+  DenormalDisabler denormal_disabler(use_denormal_disabler_);
 
   // Ensure that not both the AEC and AECM are active at the same time.
   // TODO(peah): Simplify once the public API Enable functions for these
@@ -1436,6 +1445,8 @@
                                               float* const* dest) {
   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig");
   MutexLock lock(&mutex_render_);
+  DenormalDisabler denormal_disabler(use_denormal_disabler_);
+
   RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config));
   if (submodule_states_.RenderMultiBandProcessingActive() ||
       submodule_states_.RenderFullBandProcessingActive()) {
@@ -1473,6 +1484,8 @@
   RTC_DCHECK_EQ(input_config.num_frames(),
                 formats_.api_format.reverse_input_stream().num_frames());
 
+  DenormalDisabler denormal_disabler(use_denormal_disabler_);
+
   if (aec_dump_) {
     const size_t channel_size =
         formats_.api_format.reverse_input_stream().num_frames();
@@ -1497,6 +1510,8 @@
   }
 
   MutexLock lock(&mutex_render_);
+  DenormalDisabler denormal_disabler(use_denormal_disabler_);
+
   ProcessingConfig processing_config = formats_.api_format;
   processing_config.reverse_input_stream().set_sample_rate_hz(
       input_config.sample_rate_hz());
@@ -1531,6 +1546,7 @@
   AudioBuffer* render_buffer = render_.render_audio.get();  // For brevity.
 
   HandleRenderRuntimeSettings();
+  DenormalDisabler denormal_disabler(use_denormal_disabler_);
 
   if (submodules_.render_pre_processor) {
     submodules_.render_pre_processor->Process(render_buffer);