blob: bbb33e3143c6cc21eab60ded40e4ea35fdbc276b [file] [log] [blame]
henrike@webrtc.org28e20752013-07-10 00:45:36 +00001/*
kjellander65c7f672016-02-12 00:05:01 -08002 * Copyright 2011 The WebRTC project authors. All Rights Reserved.
henrike@webrtc.org28e20752013-07-10 00:45:36 +00003 *
kjellander65c7f672016-02-12 00:05:01 -08004 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
henrike@webrtc.org28e20752013-07-10 00:45:36 +00009 */
10
kjellander@webrtc.org9b8df252016-02-12 06:47:59 +010011#include "webrtc/pc/currentspeakermonitor.h"
henrike@webrtc.org28e20752013-07-10 00:45:36 +000012
buildbot@webrtc.orga09a9992014-08-13 17:26:08 +000013#include "webrtc/base/logging.h"
kjellandera96e2d72016-02-04 23:52:28 -080014#include "webrtc/media/base/streamparams.h"
kjellander@webrtc.org9b8df252016-02-12 06:47:59 +010015#include "webrtc/pc/audiomonitor.h"
henrike@webrtc.org28e20752013-07-10 00:45:36 +000016
17namespace cricket {
18
19namespace {
20const int kMaxAudioLevel = 9;
21// To avoid overswitching, we disable switching for a period of time after a
22// switch is done.
23const int kDefaultMinTimeBetweenSwitches = 1000;
24}
25
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000026CurrentSpeakerMonitor::CurrentSpeakerMonitor(
deadbeefd59daf82015-10-14 15:02:44 -070027 AudioSourceContext* audio_source_context)
henrike@webrtc.org28e20752013-07-10 00:45:36 +000028 : started_(false),
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000029 audio_source_context_(audio_source_context),
henrike@webrtc.org28e20752013-07-10 00:45:36 +000030 current_speaker_ssrc_(0),
31 earliest_permitted_switch_time_(0),
deadbeefd59daf82015-10-14 15:02:44 -070032 min_time_between_switches_(kDefaultMinTimeBetweenSwitches) {}
henrike@webrtc.org28e20752013-07-10 00:45:36 +000033
34CurrentSpeakerMonitor::~CurrentSpeakerMonitor() {
35 Stop();
36}
37
38void CurrentSpeakerMonitor::Start() {
39 if (!started_) {
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000040 audio_source_context_->SignalAudioMonitor.connect(
henrike@webrtc.org28e20752013-07-10 00:45:36 +000041 this, &CurrentSpeakerMonitor::OnAudioMonitor);
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000042 audio_source_context_->SignalMediaStreamsUpdate.connect(
henrike@webrtc.org28e20752013-07-10 00:45:36 +000043 this, &CurrentSpeakerMonitor::OnMediaStreamsUpdate);
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +000044 audio_source_context_->SignalMediaStreamsReset.connect(
45 this, &CurrentSpeakerMonitor::OnMediaStreamsReset);
henrike@webrtc.org28e20752013-07-10 00:45:36 +000046
47 started_ = true;
48 }
49}
50
51void CurrentSpeakerMonitor::Stop() {
52 if (started_) {
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000053 audio_source_context_->SignalAudioMonitor.disconnect(this);
54 audio_source_context_->SignalMediaStreamsUpdate.disconnect(this);
henrike@webrtc.org28e20752013-07-10 00:45:36 +000055
56 started_ = false;
57 ssrc_to_speaking_state_map_.clear();
58 current_speaker_ssrc_ = 0;
59 earliest_permitted_switch_time_ = 0;
60 }
61}
62
63void CurrentSpeakerMonitor::set_min_time_between_switches(
Peter Boström0c4e06b2015-10-07 12:23:21 +020064 uint32_t min_time_between_switches) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +000065 min_time_between_switches_ = min_time_between_switches;
66}
67
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000068void CurrentSpeakerMonitor::OnAudioMonitor(
69 AudioSourceContext* audio_source_context, const AudioInfo& info) {
Peter Boström0c4e06b2015-10-07 12:23:21 +020070 std::map<uint32_t, int> active_ssrc_to_level_map;
henrike@webrtc.org28e20752013-07-10 00:45:36 +000071 cricket::AudioInfo::StreamList::const_iterator stream_list_it;
72 for (stream_list_it = info.active_streams.begin();
73 stream_list_it != info.active_streams.end(); ++stream_list_it) {
Peter Boström0c4e06b2015-10-07 12:23:21 +020074 uint32_t ssrc = stream_list_it->first;
henrike@webrtc.org28e20752013-07-10 00:45:36 +000075 active_ssrc_to_level_map[ssrc] = stream_list_it->second;
76
77 // It's possible we haven't yet added this source to our map. If so,
78 // add it now with a "not speaking" state.
79 if (ssrc_to_speaking_state_map_.find(ssrc) ==
80 ssrc_to_speaking_state_map_.end()) {
81 ssrc_to_speaking_state_map_[ssrc] = SS_NOT_SPEAKING;
82 }
83 }
84
85 int max_level = 0;
Peter Boström0c4e06b2015-10-07 12:23:21 +020086 uint32_t loudest_speaker_ssrc = 0;
henrike@webrtc.org28e20752013-07-10 00:45:36 +000087
88 // Update the speaking states of all participants based on the new audio
89 // level information. Also retain loudest speaker.
Peter Boström0c4e06b2015-10-07 12:23:21 +020090 std::map<uint32_t, SpeakingState>::iterator state_it;
henrike@webrtc.org28e20752013-07-10 00:45:36 +000091 for (state_it = ssrc_to_speaking_state_map_.begin();
92 state_it != ssrc_to_speaking_state_map_.end(); ++state_it) {
93 bool is_previous_speaker = current_speaker_ssrc_ == state_it->first;
94
95 // This uses a state machine in order to gradually identify
96 // members as having started or stopped speaking. Matches the
97 // algorithm used by the hangouts js code.
98
Peter Boström0c4e06b2015-10-07 12:23:21 +020099 std::map<uint32_t, int>::const_iterator level_it =
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000100 active_ssrc_to_level_map.find(state_it->first);
101 // Note that the stream map only contains streams with non-zero audio
102 // levels.
103 int level = (level_it != active_ssrc_to_level_map.end()) ?
104 level_it->second : 0;
105 switch (state_it->second) {
106 case SS_NOT_SPEAKING:
107 if (level > 0) {
108 // Reset level because we don't think they're really speaking.
109 level = 0;
110 state_it->second = SS_MIGHT_BE_SPEAKING;
111 } else {
112 // State unchanged.
113 }
114 break;
115 case SS_MIGHT_BE_SPEAKING:
116 if (level > 0) {
117 state_it->second = SS_SPEAKING;
118 } else {
119 state_it->second = SS_NOT_SPEAKING;
120 }
121 break;
122 case SS_SPEAKING:
123 if (level > 0) {
124 // State unchanged.
125 } else {
126 state_it->second = SS_WAS_SPEAKING_RECENTLY1;
127 if (is_previous_speaker) {
128 // Assume this is an inter-word silence and assign him the highest
129 // volume.
130 level = kMaxAudioLevel;
131 }
132 }
133 break;
134 case SS_WAS_SPEAKING_RECENTLY1:
135 if (level > 0) {
136 state_it->second = SS_SPEAKING;
137 } else {
138 state_it->second = SS_WAS_SPEAKING_RECENTLY2;
139 if (is_previous_speaker) {
140 // Assume this is an inter-word silence and assign him the highest
141 // volume.
142 level = kMaxAudioLevel;
143 }
144 }
145 break;
146 case SS_WAS_SPEAKING_RECENTLY2:
147 if (level > 0) {
148 state_it->second = SS_SPEAKING;
149 } else {
150 state_it->second = SS_NOT_SPEAKING;
151 }
152 break;
153 }
154
155 if (level > max_level) {
156 loudest_speaker_ssrc = state_it->first;
157 max_level = level;
158 } else if (level > 0 && level == max_level && is_previous_speaker) {
159 // Favor continuity of loudest speakers if audio levels are equal.
160 loudest_speaker_ssrc = state_it->first;
161 }
162 }
163
164 // We avoid over-switching by disabling switching for a period of time after
165 // a switch is done.
Peter Boström0c4e06b2015-10-07 12:23:21 +0200166 uint32_t now = rtc::Time();
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000167 if (earliest_permitted_switch_time_ <= now &&
168 current_speaker_ssrc_ != loudest_speaker_ssrc) {
169 current_speaker_ssrc_ = loudest_speaker_ssrc;
170 LOG(LS_INFO) << "Current speaker changed to " << current_speaker_ssrc_;
171 earliest_permitted_switch_time_ = now + min_time_between_switches_;
172 SignalUpdate(this, current_speaker_ssrc_);
173 }
174}
175
buildbot@webrtc.orgca272362014-05-08 23:10:23 +0000176void CurrentSpeakerMonitor::OnMediaStreamsUpdate(
deadbeefd59daf82015-10-14 15:02:44 -0700177 AudioSourceContext* audio_source_context,
178 const MediaStreams& added,
179 const MediaStreams& removed) {
180 if (audio_source_context == audio_source_context_) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000181 // Update the speaking state map based on added and removed streams.
182 for (std::vector<cricket::StreamParams>::const_iterator
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000183 it = removed.audio().begin(); it != removed.audio().end(); ++it) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000184 ssrc_to_speaking_state_map_.erase(it->first_ssrc());
185 }
186
187 for (std::vector<cricket::StreamParams>::const_iterator
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000188 it = added.audio().begin(); it != added.audio().end(); ++it) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000189 ssrc_to_speaking_state_map_[it->first_ssrc()] = SS_NOT_SPEAKING;
190 }
191 }
192}
193
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000194void CurrentSpeakerMonitor::OnMediaStreamsReset(
deadbeefd59daf82015-10-14 15:02:44 -0700195 AudioSourceContext* audio_source_context) {
196 if (audio_source_context == audio_source_context_) {
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000197 ssrc_to_speaking_state_map_.clear();
198 }
199}
200
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000201} // namespace cricket