blob: 5cfab267a04ab1ecda1b3ea2edcd76f95836e29d [file] [log] [blame]
henrike@webrtc.org28e20752013-07-10 00:45:36 +00001/*
2 * libjingle
3 * Copyright 2011 Google Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "talk/session/media/currentspeakermonitor.h"
29
buildbot@webrtc.org117afee2014-06-16 07:11:01 +000030#include "talk/media/base/streamparams.h"
31#include "talk/session/media/audiomonitor.h"
buildbot@webrtc.orga09a9992014-08-13 17:26:08 +000032#include "webrtc/base/logging.h"
henrike@webrtc.org28e20752013-07-10 00:45:36 +000033
34namespace cricket {
35
36namespace {
37const int kMaxAudioLevel = 9;
38// To avoid overswitching, we disable switching for a period of time after a
39// switch is done.
40const int kDefaultMinTimeBetweenSwitches = 1000;
41}
42
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000043CurrentSpeakerMonitor::CurrentSpeakerMonitor(
44 AudioSourceContext* audio_source_context, BaseSession* session)
henrike@webrtc.org28e20752013-07-10 00:45:36 +000045 : started_(false),
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000046 audio_source_context_(audio_source_context),
henrike@webrtc.org28e20752013-07-10 00:45:36 +000047 session_(session),
48 current_speaker_ssrc_(0),
49 earliest_permitted_switch_time_(0),
50 min_time_between_switches_(kDefaultMinTimeBetweenSwitches) {
51}
52
53CurrentSpeakerMonitor::~CurrentSpeakerMonitor() {
54 Stop();
55}
56
57void CurrentSpeakerMonitor::Start() {
58 if (!started_) {
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000059 audio_source_context_->SignalAudioMonitor.connect(
henrike@webrtc.org28e20752013-07-10 00:45:36 +000060 this, &CurrentSpeakerMonitor::OnAudioMonitor);
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000061 audio_source_context_->SignalMediaStreamsUpdate.connect(
henrike@webrtc.org28e20752013-07-10 00:45:36 +000062 this, &CurrentSpeakerMonitor::OnMediaStreamsUpdate);
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +000063 audio_source_context_->SignalMediaStreamsReset.connect(
64 this, &CurrentSpeakerMonitor::OnMediaStreamsReset);
henrike@webrtc.org28e20752013-07-10 00:45:36 +000065
66 started_ = true;
67 }
68}
69
70void CurrentSpeakerMonitor::Stop() {
71 if (started_) {
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000072 audio_source_context_->SignalAudioMonitor.disconnect(this);
73 audio_source_context_->SignalMediaStreamsUpdate.disconnect(this);
henrike@webrtc.org28e20752013-07-10 00:45:36 +000074
75 started_ = false;
76 ssrc_to_speaking_state_map_.clear();
77 current_speaker_ssrc_ = 0;
78 earliest_permitted_switch_time_ = 0;
79 }
80}
81
82void CurrentSpeakerMonitor::set_min_time_between_switches(
Peter Boström0c4e06b2015-10-07 12:23:21 +020083 uint32_t min_time_between_switches) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +000084 min_time_between_switches_ = min_time_between_switches;
85}
86
buildbot@webrtc.orgca272362014-05-08 23:10:23 +000087void CurrentSpeakerMonitor::OnAudioMonitor(
88 AudioSourceContext* audio_source_context, const AudioInfo& info) {
Peter Boström0c4e06b2015-10-07 12:23:21 +020089 std::map<uint32_t, int> active_ssrc_to_level_map;
henrike@webrtc.org28e20752013-07-10 00:45:36 +000090 cricket::AudioInfo::StreamList::const_iterator stream_list_it;
91 for (stream_list_it = info.active_streams.begin();
92 stream_list_it != info.active_streams.end(); ++stream_list_it) {
Peter Boström0c4e06b2015-10-07 12:23:21 +020093 uint32_t ssrc = stream_list_it->first;
henrike@webrtc.org28e20752013-07-10 00:45:36 +000094 active_ssrc_to_level_map[ssrc] = stream_list_it->second;
95
96 // It's possible we haven't yet added this source to our map. If so,
97 // add it now with a "not speaking" state.
98 if (ssrc_to_speaking_state_map_.find(ssrc) ==
99 ssrc_to_speaking_state_map_.end()) {
100 ssrc_to_speaking_state_map_[ssrc] = SS_NOT_SPEAKING;
101 }
102 }
103
104 int max_level = 0;
Peter Boström0c4e06b2015-10-07 12:23:21 +0200105 uint32_t loudest_speaker_ssrc = 0;
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000106
107 // Update the speaking states of all participants based on the new audio
108 // level information. Also retain loudest speaker.
Peter Boström0c4e06b2015-10-07 12:23:21 +0200109 std::map<uint32_t, SpeakingState>::iterator state_it;
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000110 for (state_it = ssrc_to_speaking_state_map_.begin();
111 state_it != ssrc_to_speaking_state_map_.end(); ++state_it) {
112 bool is_previous_speaker = current_speaker_ssrc_ == state_it->first;
113
114 // This uses a state machine in order to gradually identify
115 // members as having started or stopped speaking. Matches the
116 // algorithm used by the hangouts js code.
117
Peter Boström0c4e06b2015-10-07 12:23:21 +0200118 std::map<uint32_t, int>::const_iterator level_it =
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000119 active_ssrc_to_level_map.find(state_it->first);
120 // Note that the stream map only contains streams with non-zero audio
121 // levels.
122 int level = (level_it != active_ssrc_to_level_map.end()) ?
123 level_it->second : 0;
124 switch (state_it->second) {
125 case SS_NOT_SPEAKING:
126 if (level > 0) {
127 // Reset level because we don't think they're really speaking.
128 level = 0;
129 state_it->second = SS_MIGHT_BE_SPEAKING;
130 } else {
131 // State unchanged.
132 }
133 break;
134 case SS_MIGHT_BE_SPEAKING:
135 if (level > 0) {
136 state_it->second = SS_SPEAKING;
137 } else {
138 state_it->second = SS_NOT_SPEAKING;
139 }
140 break;
141 case SS_SPEAKING:
142 if (level > 0) {
143 // State unchanged.
144 } else {
145 state_it->second = SS_WAS_SPEAKING_RECENTLY1;
146 if (is_previous_speaker) {
147 // Assume this is an inter-word silence and assign him the highest
148 // volume.
149 level = kMaxAudioLevel;
150 }
151 }
152 break;
153 case SS_WAS_SPEAKING_RECENTLY1:
154 if (level > 0) {
155 state_it->second = SS_SPEAKING;
156 } else {
157 state_it->second = SS_WAS_SPEAKING_RECENTLY2;
158 if (is_previous_speaker) {
159 // Assume this is an inter-word silence and assign him the highest
160 // volume.
161 level = kMaxAudioLevel;
162 }
163 }
164 break;
165 case SS_WAS_SPEAKING_RECENTLY2:
166 if (level > 0) {
167 state_it->second = SS_SPEAKING;
168 } else {
169 state_it->second = SS_NOT_SPEAKING;
170 }
171 break;
172 }
173
174 if (level > max_level) {
175 loudest_speaker_ssrc = state_it->first;
176 max_level = level;
177 } else if (level > 0 && level == max_level && is_previous_speaker) {
178 // Favor continuity of loudest speakers if audio levels are equal.
179 loudest_speaker_ssrc = state_it->first;
180 }
181 }
182
183 // We avoid over-switching by disabling switching for a period of time after
184 // a switch is done.
Peter Boström0c4e06b2015-10-07 12:23:21 +0200185 uint32_t now = rtc::Time();
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000186 if (earliest_permitted_switch_time_ <= now &&
187 current_speaker_ssrc_ != loudest_speaker_ssrc) {
188 current_speaker_ssrc_ = loudest_speaker_ssrc;
189 LOG(LS_INFO) << "Current speaker changed to " << current_speaker_ssrc_;
190 earliest_permitted_switch_time_ = now + min_time_between_switches_;
191 SignalUpdate(this, current_speaker_ssrc_);
192 }
193}
194
buildbot@webrtc.orgca272362014-05-08 23:10:23 +0000195void CurrentSpeakerMonitor::OnMediaStreamsUpdate(
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000196 AudioSourceContext* audio_source_context, BaseSession* session,
buildbot@webrtc.orgca272362014-05-08 23:10:23 +0000197 const MediaStreams& added, const MediaStreams& removed) {
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000198
buildbot@webrtc.orgca272362014-05-08 23:10:23 +0000199 if (audio_source_context == audio_source_context_ && session == session_) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000200 // Update the speaking state map based on added and removed streams.
201 for (std::vector<cricket::StreamParams>::const_iterator
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000202 it = removed.audio().begin(); it != removed.audio().end(); ++it) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000203 ssrc_to_speaking_state_map_.erase(it->first_ssrc());
204 }
205
206 for (std::vector<cricket::StreamParams>::const_iterator
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000207 it = added.audio().begin(); it != added.audio().end(); ++it) {
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000208 ssrc_to_speaking_state_map_[it->first_ssrc()] = SS_NOT_SPEAKING;
209 }
210 }
211}
212
buildbot@webrtc.org49a6a272014-05-21 00:24:54 +0000213void CurrentSpeakerMonitor::OnMediaStreamsReset(
214 AudioSourceContext* audio_source_context, BaseSession* session) {
215 if (audio_source_context == audio_source_context_ && session == session_) {
216 ssrc_to_speaking_state_map_.clear();
217 }
218}
219
henrike@webrtc.org28e20752013-07-10 00:45:36 +0000220} // namespace cricket