blob: 1f3e0938fb9e73673216b8c5996a625aff67803d [file] [log] [blame]
henrike@webrtc.org28e20752013-07-10 00:45:36 +00001/*
2 * libjingle
3 * Copyright 2011 Google Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "talk/session/media/currentspeakermonitor.h"
29
30#include "talk/base/logging.h"
31#include "talk/session/media/call.h"
32
33namespace cricket {
34
35namespace {
36const int kMaxAudioLevel = 9;
37// To avoid overswitching, we disable switching for a period of time after a
38// switch is done.
39const int kDefaultMinTimeBetweenSwitches = 1000;
40}
41
42CurrentSpeakerMonitor::CurrentSpeakerMonitor(Call* call, BaseSession* session)
43 : started_(false),
44 call_(call),
45 session_(session),
46 current_speaker_ssrc_(0),
47 earliest_permitted_switch_time_(0),
48 min_time_between_switches_(kDefaultMinTimeBetweenSwitches) {
49}
50
51CurrentSpeakerMonitor::~CurrentSpeakerMonitor() {
52 Stop();
53}
54
55void CurrentSpeakerMonitor::Start() {
56 if (!started_) {
57 call_->SignalAudioMonitor.connect(
58 this, &CurrentSpeakerMonitor::OnAudioMonitor);
59 call_->SignalMediaStreamsUpdate.connect(
60 this, &CurrentSpeakerMonitor::OnMediaStreamsUpdate);
61
62 started_ = true;
63 }
64}
65
66void CurrentSpeakerMonitor::Stop() {
67 if (started_) {
68 call_->SignalAudioMonitor.disconnect(this);
69 call_->SignalMediaStreamsUpdate.disconnect(this);
70
71 started_ = false;
72 ssrc_to_speaking_state_map_.clear();
73 current_speaker_ssrc_ = 0;
74 earliest_permitted_switch_time_ = 0;
75 }
76}
77
78void CurrentSpeakerMonitor::set_min_time_between_switches(
79 uint32 min_time_between_switches) {
80 min_time_between_switches_ = min_time_between_switches;
81}
82
83void CurrentSpeakerMonitor::OnAudioMonitor(Call* call, const AudioInfo& info) {
84 std::map<uint32, int> active_ssrc_to_level_map;
85 cricket::AudioInfo::StreamList::const_iterator stream_list_it;
86 for (stream_list_it = info.active_streams.begin();
87 stream_list_it != info.active_streams.end(); ++stream_list_it) {
88 uint32 ssrc = stream_list_it->first;
89 active_ssrc_to_level_map[ssrc] = stream_list_it->second;
90
91 // It's possible we haven't yet added this source to our map. If so,
92 // add it now with a "not speaking" state.
93 if (ssrc_to_speaking_state_map_.find(ssrc) ==
94 ssrc_to_speaking_state_map_.end()) {
95 ssrc_to_speaking_state_map_[ssrc] = SS_NOT_SPEAKING;
96 }
97 }
98
99 int max_level = 0;
100 uint32 loudest_speaker_ssrc = 0;
101
102 // Update the speaking states of all participants based on the new audio
103 // level information. Also retain loudest speaker.
104 std::map<uint32, SpeakingState>::iterator state_it;
105 for (state_it = ssrc_to_speaking_state_map_.begin();
106 state_it != ssrc_to_speaking_state_map_.end(); ++state_it) {
107 bool is_previous_speaker = current_speaker_ssrc_ == state_it->first;
108
109 // This uses a state machine in order to gradually identify
110 // members as having started or stopped speaking. Matches the
111 // algorithm used by the hangouts js code.
112
113 std::map<uint32, int>::const_iterator level_it =
114 active_ssrc_to_level_map.find(state_it->first);
115 // Note that the stream map only contains streams with non-zero audio
116 // levels.
117 int level = (level_it != active_ssrc_to_level_map.end()) ?
118 level_it->second : 0;
119 switch (state_it->second) {
120 case SS_NOT_SPEAKING:
121 if (level > 0) {
122 // Reset level because we don't think they're really speaking.
123 level = 0;
124 state_it->second = SS_MIGHT_BE_SPEAKING;
125 } else {
126 // State unchanged.
127 }
128 break;
129 case SS_MIGHT_BE_SPEAKING:
130 if (level > 0) {
131 state_it->second = SS_SPEAKING;
132 } else {
133 state_it->second = SS_NOT_SPEAKING;
134 }
135 break;
136 case SS_SPEAKING:
137 if (level > 0) {
138 // State unchanged.
139 } else {
140 state_it->second = SS_WAS_SPEAKING_RECENTLY1;
141 if (is_previous_speaker) {
142 // Assume this is an inter-word silence and assign him the highest
143 // volume.
144 level = kMaxAudioLevel;
145 }
146 }
147 break;
148 case SS_WAS_SPEAKING_RECENTLY1:
149 if (level > 0) {
150 state_it->second = SS_SPEAKING;
151 } else {
152 state_it->second = SS_WAS_SPEAKING_RECENTLY2;
153 if (is_previous_speaker) {
154 // Assume this is an inter-word silence and assign him the highest
155 // volume.
156 level = kMaxAudioLevel;
157 }
158 }
159 break;
160 case SS_WAS_SPEAKING_RECENTLY2:
161 if (level > 0) {
162 state_it->second = SS_SPEAKING;
163 } else {
164 state_it->second = SS_NOT_SPEAKING;
165 }
166 break;
167 }
168
169 if (level > max_level) {
170 loudest_speaker_ssrc = state_it->first;
171 max_level = level;
172 } else if (level > 0 && level == max_level && is_previous_speaker) {
173 // Favor continuity of loudest speakers if audio levels are equal.
174 loudest_speaker_ssrc = state_it->first;
175 }
176 }
177
178 // We avoid over-switching by disabling switching for a period of time after
179 // a switch is done.
180 uint32 now = talk_base::Time();
181 if (earliest_permitted_switch_time_ <= now &&
182 current_speaker_ssrc_ != loudest_speaker_ssrc) {
183 current_speaker_ssrc_ = loudest_speaker_ssrc;
184 LOG(LS_INFO) << "Current speaker changed to " << current_speaker_ssrc_;
185 earliest_permitted_switch_time_ = now + min_time_between_switches_;
186 SignalUpdate(this, current_speaker_ssrc_);
187 }
188}
189
190void CurrentSpeakerMonitor::OnMediaStreamsUpdate(Call* call,
191 Session* session,
192 const MediaStreams& added,
193 const MediaStreams& removed) {
194 if (call == call_ && session == session_) {
195 // Update the speaking state map based on added and removed streams.
196 for (std::vector<cricket::StreamParams>::const_iterator
197 it = removed.video().begin(); it != removed.video().end(); ++it) {
198 ssrc_to_speaking_state_map_.erase(it->first_ssrc());
199 }
200
201 for (std::vector<cricket::StreamParams>::const_iterator
202 it = added.video().begin(); it != added.video().end(); ++it) {
203 ssrc_to_speaking_state_map_[it->first_ssrc()] = SS_NOT_SPEAKING;
204 }
205 }
206}
207
208} // namespace cricket