Revert "Reland "Remove unused APM voice activity detection sub-module"" This reverts commit 54d1344d985b00d4d1580dd18057d4618c11ad1f. Reason for revert: Breaks chromium roll, see https://ci.chromium.org/ui/p/chromium/builders/try/linux_chromium_tsan_rel_ng/1080583/overview https://chromium-review.googlesource.com/c/chromium/src/+/3461512 Original change's description: > Reland "Remove unused APM voice activity detection sub-module" > > This reverts commit a751f167c68343f76528436defdbc61600a8d7b3. > > Reason for revert: dependency in a downstream project removed > > Original change's description: > > Revert "Remove unused APM voice activity detection sub-module" > > > > This reverts commit b4e06d032e6f82a65c52ed0c5364ae9e7c0a0215. > > > > Reason for revert: breaking downstream projects > > > > Original change's description: > > > Remove unused APM voice activity detection sub-module > > > > > > API changes: > > > - webrtc::AudioProcessing::Config::VoiceDetection removed > > > - webrtc::AudioProcessingStats::voice_detected deprecated > > > - cricket::AudioOptions::typing_detection deprecated > > > - webrtc::StatsReport::StatsValueName:: > > > kStatsValueNameTypingNoiseState deprecated > > > > > > PSA: https://groups.google.com/g/discuss-webrtc/c/7X6uwmJarE0 > > > > > > Bug: webrtc:11226,webrtc:11292 > > > Change-Id: I8d008b56708cf62961b9857ec052b59fda3b41bf > > > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/250666 > > > Reviewed-by: Harald Alvestrand <hta@webrtc.org> > > > Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> > > > Reviewed-by: Sam Zackrisson <saza@webrtc.org> > > > Reviewed-by: Björn Terelius <terelius@webrtc.org> > > > Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> > > > Cr-Commit-Position: refs/heads/main@{#35975} > > > > TBR=gustaf@webrtc.org,saza@webrtc.org,alessiob@webrtc.org,terelius@webrtc.org,hta@webrtc.org,webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com > > > > Change-Id: Iee01fdb874b4e0331277f3ffe60dacaabc3859a2 > > No-Presubmit: true > > No-Tree-Checks: true > > No-Try: true > > Bug: webrtc:11226,webrtc:11292 > > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/251600 > > Reviewed-by: Harald Alvestrand <hta@webrtc.org> > > Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> > > Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org> > > Cr-Commit-Position: refs/heads/main@{#35977} > > # Not skipping CQ checks because this is a reland. > > Bug: webrtc:11226,webrtc:11292 > Change-Id: I2fcbc5fdade16bfe6a0f0a02841a33a598d4f2ad > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/251660 > Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> > Reviewed-by: Harald Alvestrand <hta@webrtc.org> > Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> > Cr-Commit-Position: refs/heads/main@{#35984} TBR=mbonadei@webrtc.org,gustaf@webrtc.org,saza@webrtc.org,alessiob@webrtc.org,terelius@webrtc.org,hta@webrtc.org,webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com Change-Id: Ib308a3af2dcce85a0074ef5a4680ccec3f82712f No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: webrtc:11226,webrtc:11292 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/251688 Reviewed-by: Henrik Boström <hbos@webrtc.org> Bot-Commit: rubber-stamper@appspot.gserviceaccount.com <rubber-stamper@appspot.gserviceaccount.com> Auto-Submit: Henrik Boström <hbos@webrtc.org> Reviewed-by: Harald Alvestrand <hta@webrtc.org> Commit-Queue: Harald Alvestrand <hta@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35990}

commit: 09aaf6f7bcfb4da644bd86c76896a04a41f776e1 [log] [tgz]
author: Henrik Boström <hbos@webrtc.org> Mon Feb 14 12:02:45 2022 +0000
committer: WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com> Mon Feb 14 12:25:51 2022 +0000
tree: 1f92f3f3b96634f779b4e9f0d06a96223154fd1c
parent: eb6c6fcf275ae3e60172b4b781b0ba75715840bb [diff] [blame]
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index b21a022..96e2d84 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc

@@ -190,6 +190,7 @@
   apm_config.noise_suppression.enabled = true;
 
   apm_config.high_pass_filter.enabled = true;
+  apm_config.voice_detection.enabled = true;
   apm_config.pipeline.maximum_internal_processing_rate = 48000;
   ap->ApplyConfig(apm_config);
 }
@@ -1225,6 +1226,7 @@
   EXPECT_FALSE(config.high_pass_filter.enabled);
   EXPECT_FALSE(config.gain_controller1.enabled);
   EXPECT_FALSE(config.noise_suppression.enabled);
+  EXPECT_FALSE(config.voice_detection.enabled);
 }
 
 TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) {
@@ -1365,6 +1367,48 @@
   EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
   apm_->ApplyConfig(apm_config);
 
+  // 3. Only GetStatistics-reporting VAD is enabled...
+  SetFrameTo(&frame_, 1000);
+  frame_copy.CopyFrom(frame_);
+  apm_config.voice_detection.enabled = true;
+  apm_->ApplyConfig(apm_config);
+  EXPECT_EQ(apm_->kNoError,
+            apm_->ProcessStream(
+                frame_.data.data(),
+                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
+                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
+                frame_.data.data()));
+  EXPECT_EQ(apm_->kNoError,
+            apm_->ProcessStream(
+                frame_.data.data(),
+                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
+                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
+                frame_.data.data()));
+  EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
+  apm_config.voice_detection.enabled = false;
+  apm_->ApplyConfig(apm_config);
+
+  // 4. The VAD is enabled...
+  SetFrameTo(&frame_, 1000);
+  frame_copy.CopyFrom(frame_);
+  apm_config.voice_detection.enabled = true;
+  apm_->ApplyConfig(apm_config);
+  EXPECT_EQ(apm_->kNoError,
+            apm_->ProcessStream(
+                frame_.data.data(),
+                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
+                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
+                frame_.data.data()));
+  EXPECT_EQ(apm_->kNoError,
+            apm_->ProcessStream(
+                frame_.data.data(),
+                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
+                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
+                frame_.data.data()));
+  EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
+  apm_config.voice_detection.enabled = false;
+  apm_->ApplyConfig(apm_config);
+
   // Check the test is valid. We should have distortion from the filter
   // when AEC is enabled (which won't affect the audio).
   apm_config.echo_canceller.enabled = true;
@@ -1692,6 +1736,7 @@
          static_cast<size_t>(test->num_reverse_channels()), true);
 
     int frame_count = 0;
+    int has_voice_count = 0;
     int analog_level = 127;
     int analog_level_average = 0;
     int max_output_average = 0;
@@ -1727,6 +1772,8 @@
       analog_level = apm_->recommended_stream_analog_level();
       analog_level_average += analog_level;
       AudioProcessingStats stats = apm_->GetStatistics();
+      EXPECT_TRUE(stats.voice_detected);
+      has_voice_count += *stats.voice_detected ? 1 : 0;
 
       size_t frame_size = frame_.samples_per_channel * frame_.num_channels;
       size_t write_count =
@@ -1782,23 +1829,33 @@
 
     if (!absl::GetFlag(FLAGS_write_apm_ref_data)) {
       const int kIntNear = 1;
-      // All numbers being consistently higher on N7 compare to the reference
-      // data.
+      // When running the test on a N7 we get a {2, 6} difference of
+      // `has_voice_count` and `max_output_average` is up to 18 higher.
+      // All numbers being consistently higher on N7 compare to ref_data.
       // TODO(bjornv): If we start getting more of these offsets on Android we
       // should consider a different approach. Either using one slack for all,
       // or generate a separate android reference.
 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
+      const int kHasVoiceCountOffset = 3;
+      const int kHasVoiceCountNear = 8;
       const int kMaxOutputAverageOffset = 9;
       const int kMaxOutputAverageNear = 26;
 #else
+      const int kHasVoiceCountOffset = 0;
+      const int kHasVoiceCountNear = kIntNear;
       const int kMaxOutputAverageOffset = 0;
       const int kMaxOutputAverageNear = kIntNear;
 #endif
+      EXPECT_NEAR(test->has_voice_count(),
+                  has_voice_count - kHasVoiceCountOffset, kHasVoiceCountNear);
+
       EXPECT_NEAR(test->analog_level_average(), analog_level_average, kIntNear);
       EXPECT_NEAR(test->max_output_average(),
                   max_output_average - kMaxOutputAverageOffset,
                   kMaxOutputAverageNear);
     } else {
+      test->set_has_voice_count(has_voice_count);
+
       test->set_analog_level_average(analog_level_average);
       test->set_max_output_average(max_output_average);
     }
@@ -2628,6 +2685,7 @@
   apm_config.echo_canceller.enabled = true;
   apm_config.echo_canceller.mobile_mode = mobile_aec;
   apm_config.noise_suppression.enabled = false;
+  apm_config.voice_detection.enabled = false;
   apm->ApplyConfig(apm_config);
   return apm;
 }
@@ -2736,9 +2794,10 @@
   EXPECT_FALSE(stats.echo_return_loss_enhancement.has_value());
 }
 
-TEST(ApmStatistics, DoNotReportVoiceDetectedStat) {
+TEST(ApmStatistics, ReportHasVoice) {
   ProcessingConfig processing_config = {
       {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}};
+  AudioProcessing::Config config;
 
   // Set up an audioframe.
   Int16FrameData frame;
@@ -2755,14 +2814,37 @@
       AudioProcessingBuilderForTesting().Create();
   apm->Initialize(processing_config);
 
-  // No metric should be reported.
+  // If not enabled, no metric should be reported.
   EXPECT_EQ(
       apm->ProcessStream(frame.data.data(),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
                          frame.data.data()),
       0);
-  EXPECT_FALSE(apm->GetStatistics().voice_detected.has_value());
+  EXPECT_FALSE(apm->GetStatistics().voice_detected);
+
+  // If enabled, metrics should be reported.
+  config.voice_detection.enabled = true;
+  apm->ApplyConfig(config);
+  EXPECT_EQ(
+      apm->ProcessStream(frame.data.data(),
+                         StreamConfig(frame.sample_rate_hz, frame.num_channels),
+                         StreamConfig(frame.sample_rate_hz, frame.num_channels),
+                         frame.data.data()),
+      0);
+  auto stats = apm->GetStatistics();
+  EXPECT_TRUE(stats.voice_detected);
+
+  // If re-disabled, the value is again not reported.
+  config.voice_detection.enabled = false;
+  apm->ApplyConfig(config);
+  EXPECT_EQ(
+      apm->ProcessStream(frame.data.data(),
+                         StreamConfig(frame.sample_rate_hz, frame.num_channels),
+                         StreamConfig(frame.sample_rate_hz, frame.num_channels),
+                         frame.data.data()),
+      0);
+  EXPECT_FALSE(apm->GetStatistics().voice_detected);
 }
 
 TEST(ApmStatistics, GetStatisticsReportsNoEchoDetectorStatsWhenDisabled) {
commit	09aaf6f7bcfb4da644bd86c76896a04a41f776e1	[log] [tgz]
author	Henrik Boström <hbos@webrtc.org>	Mon Feb 14 12:02:45 2022 +0000
committer	WebRTC LUCI CQ <webrtc-scoped@luci-project-accounts.iam.gserviceaccount.com>	Mon Feb 14 12:25:51 2022 +0000
tree	1f92f3f3b96634f779b4e9f0d06a96223154fd1c
parent	eb6c6fcf275ae3e60172b4b781b0ba75715840bb [diff] [blame]