Blame - webrtc/modules/audio_coding/neteq4/time_stretch.h - webrtc.googlesource.com/src

blob: e701e26684919fbf74df4552fb9212c27aa39574 [file] [log] [blame]

henrik.lundin@webrtc.org	d94659d	2013-01-29 12:09:21 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TIME_STRETCH_H_
				12	#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TIME_STRETCH_H_
				13
				14	#include <assert.h>
				15
				16	#include <cstring> // memset, size_t
				17
				18	#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
				19	#include "webrtc/system_wrappers/interface/constructor_magic.h"
				20	#include "webrtc/typedefs.h"
				21
				22	namespace webrtc {
				23
				24	// Forward declarations.
				25	class BackgroundNoise;
				26
				27	// This is the base class for Accelerate and PreemptiveExpand. This class
				28	// cannot be instantiated, but must be used through either of the derived
				29	// classes.
				30	class TimeStretch {
				31	public:
				32	enum ReturnCodes {
				33	kSuccess = 0,
				34	kSuccessLowEnergy = 1,
				35	kNoStretch = 2,
				36	kError = -1
				37	};
				38
				39	TimeStretch(int sample_rate_hz, size_t num_channels,
				40	const BackgroundNoise& background_noise)
				41	: sample_rate_hz_(sample_rate_hz),
				42	fs_mult_(sample_rate_hz / 8000),
				43	num_channels_(num_channels),
				44	master_channel_(0), // First channel is master.
				45	background_noise_(background_noise),
				46	max_input_value_(0) {
				47	assert(sample_rate_hz_ == 8000 \|\|
				48	sample_rate_hz_ == 16000 \|\|
				49	sample_rate_hz_ == 32000 \|\|
				50	sample_rate_hz_ == 48000);
				51	assert(num_channels_ > 0);
				52	assert(static_cast<int>(master_channel_) < num_channels_);
				53	memset(auto_correlation_, 0, sizeof(auto_correlation_));
				54	}
				55
				56	virtual ~TimeStretch() {}
				57
				58	// This method performs the processing common to both Accelerate and
				59	// PreemptiveExpand.
				60	ReturnCodes Process(const int16_t* input,
				61	size_t input_len,
				62	AudioMultiVector<int16_t>* output,
				63	int16_t* length_change_samples);
				64
				65	protected:
				66	// Sets the parameters \|best_correlation\| and \|peak_index\| to suitable
				67	// values when the signal contains no active speech. This method must be
				68	// implemented by the sub-classes.
				69	virtual void SetParametersForPassiveSpeech(int input_length,
				70	int16_t* best_correlation,
				71	int* peak_index) const = 0;
				72
				73	// Checks the criteria for performing the time-stretching operation and,
				74	// if possible, performs the time-stretching. This method must be implemented
				75	// by the sub-classes.
				76	virtual ReturnCodes CheckCriteriaAndStretch(
				77	const int16_t* input, int input_length, size_t peak_index,
				78	int16_t best_correlation, bool active_speech,
				79	AudioMultiVector<int16_t>* output) const = 0;
				80
				81	static const int kCorrelationLen = 50;
				82	static const int kLogCorrelationLen = 6; // >= log2(kCorrelationLen).
				83	static const int kMinLag = 10;
				84	static const int kMaxLag = 60;
				85	static const int kDownsampledLen = kCorrelationLen + kMaxLag;
				86	static const int kCorrelationThreshold = 14746; // 0.9 in Q14.
				87
				88	const int sample_rate_hz_;
				89	const int fs_mult_; // Sample rate multiplier = sample_rate_hz_ / 8000.
				90	const int num_channels_;
				91	const size_t master_channel_;
				92	const BackgroundNoise& background_noise_;
				93	int16_t max_input_value_;
				94	int16_t downsampled_input_[kDownsampledLen];
				95	// Adding 1 to the size of \|auto_correlation_\| because of how it is used
				96	// by the peak-detection algorithm.
				97	int16_t auto_correlation_[kCorrelationLen + 1];
				98
				99	private:
				100	// Calculates the auto-correlation of \|downsampled_input_\| and writes the
				101	// result to \|auto_correlation_\|.
				102	void AutoCorrelation();
				103
				104	// Performs a simple voice-activity detection based on the input parameters.
				105	bool SpeechDetection(int32_t vec1_energy, int32_t vec2_energy,
				106	int peak_index, int scaling) const;
				107
				108	DISALLOW_COPY_AND_ASSIGN(TimeStretch);
				109	};
				110
				111	} // namespace webrtc
				112	#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TIME_STRETCH_H_