Blame - audio/utility/channel_mixer_unittest.cc - webrtc.googlesource.com/src

blob: 94cb1ac7e3e8961711221dd8624e325a023a8108 [file] [log] [blame]

henrika	2250b05	2019-07-04 11:27:52 +0200	[diff] [blame]	1	/*
				2	* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
Jonas Olsson	a4d8737	2019-07-05 19:08:33 +0200	[diff] [blame]	11	#include "audio/utility/channel_mixer.h"
				12
henrika	2250b05	2019-07-04 11:27:52 +0200	[diff] [blame]	13	#include <memory>
				14
				15	#include "api/audio/audio_frame.h"
				16	#include "api/audio/channel_layout.h"
henrika	2250b05	2019-07-04 11:27:52 +0200	[diff] [blame]	17	#include "audio/utility/channel_mixing_matrix.h"
				18	#include "rtc_base/arraysize.h"
				19	#include "rtc_base/strings/string_builder.h"
				20	#include "test/gtest.h"
				21
				22	namespace webrtc {
				23
				24	namespace {
				25
				26	constexpr uint32_t kTimestamp = 27;
				27	constexpr int kSampleRateHz = 16000;
				28	constexpr size_t kSamplesPerChannel = kSampleRateHz / 100;
				29
				30	class ChannelMixerTest : public ::testing::Test {
				31	protected:
				32	ChannelMixerTest() {
				33	// Use 10ms audio frames by default. Don't set values yet.
				34	frame_.samples_per_channel_ = kSamplesPerChannel;
				35	frame_.sample_rate_hz_ = kSampleRateHz;
				36	EXPECT_TRUE(frame_.muted());
				37	}
				38
				39	virtual ~ChannelMixerTest() {}
				40
				41	AudioFrame frame_;
				42	};
				43
				44	void SetFrameData(int16_t data, AudioFrame* frame) {
				45	int16_t* frame_data = frame->mutable_data();
				46	for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels();
				47	i++) {
				48	frame_data[i] = data;
				49	}
				50	}
				51
				52	void SetMonoData(int16_t center, AudioFrame* frame) {
				53	frame->num_channels_ = 1;
				54	int16_t* frame_data = frame->mutable_data();
				55	for (size_t i = 0; i < frame->samples_per_channel(); ++i) {
				56	frame_data[i] = center;
				57	}
				58	EXPECT_FALSE(frame->muted());
				59	}
				60
				61	void SetStereoData(int16_t left, int16_t right, AudioFrame* frame) {
				62	ASSERT_LE(2 * frame->samples_per_channel(), frame->max_16bit_samples());
				63	frame->num_channels_ = 2;
				64	int16_t* frame_data = frame->mutable_data();
				65	for (size_t i = 0; i < frame->samples_per_channel() * 2; i += 2) {
				66	frame_data[i] = left;
				67	frame_data[i + 1] = right;
				68	}
				69	EXPECT_FALSE(frame->muted());
				70	}
				71
				72	void SetFiveOneData(int16_t front_left,
				73	int16_t front_right,
				74	int16_t center,
				75	int16_t lfe,
				76	int16_t side_left,
				77	int16_t side_right,
				78	AudioFrame* frame) {
				79	ASSERT_LE(6 * frame->samples_per_channel(), frame->max_16bit_samples());
				80	frame->num_channels_ = 6;
				81	int16_t* frame_data = frame->mutable_data();
				82	for (size_t i = 0; i < frame->samples_per_channel() * 6; i += 6) {
				83	frame_data[i] = front_left;
				84	frame_data[i + 1] = front_right;
				85	frame_data[i + 2] = center;
				86	frame_data[i + 3] = lfe;
				87	frame_data[i + 4] = side_left;
				88	frame_data[i + 5] = side_right;
				89	}
				90	EXPECT_FALSE(frame->muted());
				91	}
				92
				93	void SetSevenOneData(int16_t front_left,
				94	int16_t front_right,
				95	int16_t center,
				96	int16_t lfe,
				97	int16_t side_left,
				98	int16_t side_right,
				99	int16_t back_left,
				100	int16_t back_right,
				101	AudioFrame* frame) {
				102	ASSERT_LE(8 * frame->samples_per_channel(), frame->max_16bit_samples());
				103	frame->num_channels_ = 8;
				104	int16_t* frame_data = frame->mutable_data();
				105	for (size_t i = 0; i < frame->samples_per_channel() * 8; i += 8) {
				106	frame_data[i] = front_left;
				107	frame_data[i + 1] = front_right;
				108	frame_data[i + 2] = center;
				109	frame_data[i + 3] = lfe;
				110	frame_data[i + 4] = side_left;
				111	frame_data[i + 5] = side_right;
				112	frame_data[i + 6] = back_left;
				113	frame_data[i + 7] = back_right;
				114	}
				115	EXPECT_FALSE(frame->muted());
				116	}
				117
				118	bool AllSamplesEquals(int16_t sample, const AudioFrame* frame) {
				119	const int16_t* frame_data = frame->data();
				120	for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels();
				121	i++) {
				122	if (frame_data[i] != sample) {
				123	return false;
				124	}
				125	}
				126	return true;
				127	}
				128
				129	void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
				130	EXPECT_EQ(frame1.num_channels(), frame2.num_channels());
				131	EXPECT_EQ(frame1.samples_per_channel(), frame2.samples_per_channel());
				132	const int16_t* frame1_data = frame1.data();
				133	const int16_t* frame2_data = frame2.data();
				134	for (size_t i = 0; i < frame1.samples_per_channel() * frame1.num_channels();
				135	i++) {
				136	EXPECT_EQ(frame1_data[i], frame2_data[i]);
				137	}
				138	EXPECT_EQ(frame1.muted(), frame2.muted());
				139	}
				140
				141	} // namespace
				142
				143	// Test all possible layout conversions can be constructed and mixed. Don't
				144	// care about the actual content, simply run through all mixing combinations
				145	// and ensure that nothing fails.
				146	TEST_F(ChannelMixerTest, ConstructAllPossibleLayouts) {
				147	for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO;
				148	input_layout <= CHANNEL_LAYOUT_MAX;
				149	input_layout = static_cast<ChannelLayout>(input_layout + 1)) {
				150	for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO;
				151	output_layout <= CHANNEL_LAYOUT_MAX;
				152	output_layout = static_cast<ChannelLayout>(output_layout + 1)) {
				153	// DISCRETE, BITSTREAM can't be tested here based on the current approach.
				154	// CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable.
				155	// Stereo down mix should never be the output layout.
				156	if (input_layout == CHANNEL_LAYOUT_BITSTREAM \|\|
				157	input_layout == CHANNEL_LAYOUT_DISCRETE \|\|
				158	input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC \|\|
				159	output_layout == CHANNEL_LAYOUT_BITSTREAM \|\|
				160	output_layout == CHANNEL_LAYOUT_DISCRETE \|\|
				161	output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC \|\|
				162	output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) {
				163	continue;
				164	}
				165
				166	rtc::StringBuilder ss;
				167	ss << "Input Layout: " << input_layout
				168	<< ", Output Layout: " << output_layout;
				169	SCOPED_TRACE(ss.str());
				170	ChannelMixer mixer(input_layout, output_layout);
				171
				172	frame_.UpdateFrame(kTimestamp, nullptr, kSamplesPerChannel, kSampleRateHz,
				173	AudioFrame::kNormalSpeech, AudioFrame::kVadActive,
				174	ChannelLayoutToChannelCount(input_layout));
				175	EXPECT_TRUE(frame_.muted());
				176	mixer.Transform(&frame_);
				177	}
				178	}
				179	}
				180
				181	// Ensure that the audio frame is untouched when input and output channel
				182	// layouts are identical, i.e., the transformation should have no effect.
				183	// Exclude invalid mixing combinations.
				184	TEST_F(ChannelMixerTest, NoMixingForIdenticalChannelLayouts) {
				185	for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO;
				186	input_layout <= CHANNEL_LAYOUT_MAX;
				187	input_layout = static_cast<ChannelLayout>(input_layout + 1)) {
				188	for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO;
				189	output_layout <= CHANNEL_LAYOUT_MAX;
				190	output_layout = static_cast<ChannelLayout>(output_layout + 1)) {
				191	if (input_layout != output_layout \|\|
				192	input_layout == CHANNEL_LAYOUT_BITSTREAM \|\|
				193	input_layout == CHANNEL_LAYOUT_DISCRETE \|\|
				194	input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC \|\|
				195	output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) {
				196	continue;
				197	}
				198	ChannelMixer mixer(input_layout, output_layout);
				199	frame_.num_channels_ = ChannelLayoutToChannelCount(input_layout);
				200	SetFrameData(99, &frame_);
				201	mixer.Transform(&frame_);
				202	EXPECT_EQ(ChannelLayoutToChannelCount(input_layout),
				203	static_cast<int>(frame_.num_channels()));
				204	EXPECT_TRUE(AllSamplesEquals(99, &frame_));
				205	}
				206	}
				207	}
				208
				209	TEST_F(ChannelMixerTest, StereoToMono) {
				210	ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
				211	//
				212	// Input: stereo
				213	// LEFT RIGHT
				214	// Output: mono CENTER 0.5 0.5
				215	//
				216	SetStereoData(7, 3, &frame_);
				217	EXPECT_EQ(2u, frame_.num_channels());
				218	mixer.Transform(&frame_);
				219	EXPECT_EQ(1u, frame_.num_channels());
				220	EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
				221
				222	AudioFrame mono_frame;
				223	mono_frame.samples_per_channel_ = frame_.samples_per_channel();
				224	SetMonoData(5, &mono_frame);
				225	VerifyFramesAreEqual(mono_frame, frame_);
				226
				227	SetStereoData(-32768, -32768, &frame_);
				228	EXPECT_EQ(2u, frame_.num_channels());
				229	mixer.Transform(&frame_);
				230	EXPECT_EQ(1u, frame_.num_channels());
				231	EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
				232	SetMonoData(-32768, &mono_frame);
				233	VerifyFramesAreEqual(mono_frame, frame_);
				234	}
				235
				236	TEST_F(ChannelMixerTest, StereoToMonoMuted) {
				237	ASSERT_TRUE(frame_.muted());
				238	ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
				239	mixer.Transform(&frame_);
				240	EXPECT_EQ(1u, frame_.num_channels());
				241	EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
				242	EXPECT_TRUE(frame_.muted());
				243	}
				244
				245	TEST_F(ChannelMixerTest, FiveOneToSevenOneMuted) {
				246	ASSERT_TRUE(frame_.muted());
				247	ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1);
				248	mixer.Transform(&frame_);
				249	EXPECT_EQ(8u, frame_.num_channels());
				250	EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
				251	EXPECT_TRUE(frame_.muted());
				252	}
				253
				254	TEST_F(ChannelMixerTest, FiveOneToMono) {
				255	ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_MONO);
				256	//
				257	// Input: 5.1
				258	// LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT
				259	// Output: mono CENTER 0.707 0.707 1 0.707 0.707 0.707
				260	//
				261	// a = [10, 20, 15, 2, 5, 5]
				262	// b = [1/sqrt(2), 1/sqrt(2), 1.0, 1/sqrt(2), 1/sqrt(2), 1/sqrt(2)] =>
				263	// a * b (dot product) = 44.69848480983499,
				264	// which is truncated into 44 using 16 bit representation.
				265	//
				266	SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_);
				267	EXPECT_EQ(6u, frame_.num_channels());
				268	mixer.Transform(&frame_);
				269	EXPECT_EQ(1u, frame_.num_channels());
				270	EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
				271
				272	AudioFrame mono_frame;
				273	mono_frame.samples_per_channel_ = frame_.samples_per_channel();
				274	SetMonoData(44, &mono_frame);
				275	VerifyFramesAreEqual(mono_frame, frame_);
				276
				277	SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_);
				278	EXPECT_EQ(6u, frame_.num_channels());
				279	mixer.Transform(&frame_);
				280	EXPECT_EQ(1u, frame_.num_channels());
				281	EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
				282	SetMonoData(-32768, &mono_frame);
				283	VerifyFramesAreEqual(mono_frame, frame_);
				284	}
				285
				286	TEST_F(ChannelMixerTest, FiveOneToSevenOne) {
				287	ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1);
				288	//
				289	// Input: 5.1
				290	// LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT
				291	// Output: 7.1 LEFT 1 0 0 0 0 0
				292	// RIGHT 0 1 0 0 0 0
				293	// CENTER 0 0 1 0 0 0
				294	// LFE 0 0 0 1 0 0
				295	// SIDE_LEFT 0 0 0 0 1 0
				296	// SIDE_RIGHT 0 0 0 0 0 1
				297	// BACK_LEFT 0 0 0 0 0 0
				298	// BACK_RIGHT 0 0 0 0 0 0
				299	//
				300	SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_);
				301	EXPECT_EQ(6u, frame_.num_channels());
				302	mixer.Transform(&frame_);
				303	EXPECT_EQ(8u, frame_.num_channels());
				304	EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
				305
				306	AudioFrame seven_one_frame;
				307	seven_one_frame.samples_per_channel_ = frame_.samples_per_channel();
				308	SetSevenOneData(10, 20, 15, 2, 5, 5, 0, 0, &seven_one_frame);
				309	VerifyFramesAreEqual(seven_one_frame, frame_);
				310
				311	SetFiveOneData(-32768, 32767, -32768, 32767, -32768, 32767, &frame_);
				312	EXPECT_EQ(6u, frame_.num_channels());
				313	mixer.Transform(&frame_);
				314	EXPECT_EQ(8u, frame_.num_channels());
				315	EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
				316	SetSevenOneData(-32768, 32767, -32768, 32767, -32768, 32767, 0, 0,
				317	&seven_one_frame);
				318	VerifyFramesAreEqual(seven_one_frame, frame_);
				319	}
				320
				321	TEST_F(ChannelMixerTest, FiveOneBackToStereo) {
				322	ChannelMixer mixer(CHANNEL_LAYOUT_5_1_BACK, CHANNEL_LAYOUT_STEREO);
				323	//
				324	// Input: 5.1
				325	// LEFT RIGHT CENTER LFE BACK_LEFT BACK_RIGHT
				326	// Output: stereo LEFT 1 0 0.707 0.707 0.707 0
				327	// RIGHT 0 1 0.707 0.707 0 0.707
				328	//
				329	SetFiveOneData(20, 30, 15, 2, 5, 5, &frame_);
				330	EXPECT_EQ(6u, frame_.num_channels());
				331	mixer.Transform(&frame_);
				332	EXPECT_EQ(2u, frame_.num_channels());
				333	EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
				334
				335	AudioFrame stereo_frame;
				336	stereo_frame.samples_per_channel_ = frame_.samples_per_channel();
				337	SetStereoData(35, 45, &stereo_frame);
				338	VerifyFramesAreEqual(stereo_frame, frame_);
				339
				340	SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_);
				341	EXPECT_EQ(6u, frame_.num_channels());
				342	mixer.Transform(&frame_);
				343	EXPECT_EQ(2u, frame_.num_channels());
				344	EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
				345	SetStereoData(-32768, -32768, &stereo_frame);
				346	VerifyFramesAreEqual(stereo_frame, frame_);
				347	}
				348
				349	TEST_F(ChannelMixerTest, MonoToStereo) {
				350	ChannelMixer mixer(CHANNEL_LAYOUT_MONO, CHANNEL_LAYOUT_STEREO);
				351	//
				352	// Input: mono
				353	// CENTER
				354	// Output: stereo LEFT 1
				355	// RIGHT 1
				356	//
				357	SetMonoData(44, &frame_);
				358	EXPECT_EQ(1u, frame_.num_channels());
				359	mixer.Transform(&frame_);
				360	EXPECT_EQ(2u, frame_.num_channels());
				361	EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
				362
				363	AudioFrame stereo_frame;
				364	stereo_frame.samples_per_channel_ = frame_.samples_per_channel();
				365	SetStereoData(44, 44, &stereo_frame);
				366	VerifyFramesAreEqual(stereo_frame, frame_);
				367	}
				368
				369	TEST_F(ChannelMixerTest, StereoToFiveOne) {
				370	ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_5_1);
				371	//
				372	// Input: Stereo
				373	// LEFT RIGHT
				374	// Output: 5.1 LEFT 1 0
				375	// RIGHT 0 1
				376	// CENTER 0 0
				377	// LFE 0 0
				378	// SIDE_LEFT 0 0
				379	// SIDE_RIGHT 0 0
				380	//
				381	SetStereoData(50, 60, &frame_);
				382	EXPECT_EQ(2u, frame_.num_channels());
				383	mixer.Transform(&frame_);
				384	EXPECT_EQ(6u, frame_.num_channels());
				385	EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame_.channel_layout());
				386
				387	AudioFrame five_one_frame;
				388	five_one_frame.samples_per_channel_ = frame_.samples_per_channel();
				389	SetFiveOneData(50, 60, 0, 0, 0, 0, &five_one_frame);
				390	VerifyFramesAreEqual(five_one_frame, frame_);
				391	}
				392
				393	} // namespace webrtc