blob: da059cb5a243ebb7f5196a9a6e8860cb05842038 [file] [log] [blame]
Bjorn Terelius48b82792020-05-19 10:57:24 +02001/*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "rtc_tools/rtc_event_log_visualizer/alerts.h"
12
13#include <stdio.h>
14
15#include <algorithm>
16#include <limits>
17#include <map>
18#include <string>
19
20#include "logging/rtc_event_log/rtc_event_processor.h"
21#include "rtc_base/checks.h"
22#include "rtc_base/format_macros.h"
23#include "rtc_base/logging.h"
24#include "rtc_base/numerics/sequence_number_util.h"
25#include "rtc_base/strings/string_builder.h"
26
27namespace webrtc {
28
29void TriageHelper::Print(FILE* file) {
30 fprintf(file, "========== TRIAGE NOTIFICATIONS ==========\n");
31 for (const auto& alert : triage_alerts_) {
32 fprintf(file, "%d %s. First occurence at %3.3lf\n", alert.second.count,
33 alert.second.explanation.c_str(), alert.second.first_occurence);
34 }
35 fprintf(file, "========== END TRIAGE NOTIFICATIONS ==========\n");
36}
37
38void TriageHelper::AnalyzeStreamGaps(const ParsedRtcEventLog& parsed_log,
39 PacketDirection direction) {
40 // With 100 packets/s (~800kbps), false positives would require 10 s without
41 // data.
42 constexpr int64_t kMaxSeqNumJump = 1000;
43 // With a 90 kHz clock, false positives would require 10 s without data.
44 constexpr int64_t kMaxCaptureTimeJump = 900000;
45
46 std::string seq_num_explanation =
47 direction == kIncomingPacket
48 ? "Incoming RTP sequence number jumps more than 1000. Counter may "
49 "have been reset or rewritten incorrectly in a group call."
50 : "Outgoing RTP sequence number jumps more than 1000. Counter may "
51 "have been reset.";
52 std::string capture_time_explanation =
53 direction == kIncomingPacket ? "Incoming capture time jumps more than "
54 "10s. Clock might have been reset."
55 : "Outgoing capture time jumps more than "
56 "10s. Clock might have been reset.";
57 TriageAlertType seq_num_alert = direction == kIncomingPacket
58 ? TriageAlertType::kIncomingSeqNumJump
59 : TriageAlertType::kOutgoingSeqNumJump;
60 TriageAlertType capture_time_alert =
61 direction == kIncomingPacket ? TriageAlertType::kIncomingCaptureTimeJump
62 : TriageAlertType::kOutgoingCaptureTimeJump;
63
64 const int64_t segment_end_us =
65 parsed_log.log_segments().empty()
66 ? std::numeric_limits<int64_t>::max()
67 : parsed_log.log_segments().front().stop_time_us();
68
69 // Check for gaps in sequence numbers and capture timestamps.
70 for (const auto& stream : parsed_log.rtp_packets_by_ssrc(direction)) {
71 if (IsRtxSsrc(parsed_log, direction, stream.ssrc)) {
72 continue;
73 }
74 SeqNumUnwrapper<uint16_t> seq_num_unwrapper;
75 absl::optional<int64_t> last_seq_num;
76 SeqNumUnwrapper<uint32_t> capture_time_unwrapper;
77 absl::optional<int64_t> last_capture_time;
78 for (const auto& packet : stream.packet_view) {
79 if (packet.log_time_us() > segment_end_us) {
80 // Only process the first (LOG_START, LOG_END) segment.
81 break;
82 }
83
84 int64_t seq_num = seq_num_unwrapper.Unwrap(packet.header.sequenceNumber);
85 if (last_seq_num.has_value() &&
86 std::abs(seq_num - last_seq_num.value()) > kMaxSeqNumJump) {
87 Alert(seq_num_alert, config_.GetCallTimeSec(packet.log_time_us()),
88 seq_num_explanation);
89 }
90 last_seq_num.emplace(seq_num);
91
92 int64_t capture_time =
93 capture_time_unwrapper.Unwrap(packet.header.timestamp);
94 if (last_capture_time.has_value() &&
95 std::abs(capture_time - last_capture_time.value()) >
96 kMaxCaptureTimeJump) {
97 Alert(capture_time_alert, config_.GetCallTimeSec(packet.log_time_us()),
98 capture_time_explanation);
99 }
100 last_capture_time.emplace(capture_time);
101 }
102 }
103}
104
105void TriageHelper::AnalyzeTransmissionGaps(const ParsedRtcEventLog& parsed_log,
106 PacketDirection direction) {
107 constexpr int64_t kMaxRtpTransmissionGap = 500000;
108 constexpr int64_t kMaxRtcpTransmissionGap = 2000000;
109 std::string rtp_explanation =
110 direction == kIncomingPacket
111 ? "No RTP packets received for more than 500ms. This indicates a "
112 "network problem. Temporary video freezes and choppy or robotic "
113 "audio is unavoidable. Unnecessary BWE drops is a known issue."
114 : "No RTP packets sent for more than 500 ms. This might be an issue "
115 "with the pacer.";
116 std::string rtcp_explanation =
117 direction == kIncomingPacket
118 ? "No RTCP packets received for more than 2 s. Could be a longer "
119 "connection outage"
120 : "No RTCP sent for more than 2 s. This is most likely a bug.";
121 TriageAlertType rtp_alert = direction == kIncomingPacket
122 ? TriageAlertType::kIncomingRtpGap
123 : TriageAlertType::kOutgoingRtpGap;
124 TriageAlertType rtcp_alert = direction == kIncomingPacket
125 ? TriageAlertType::kIncomingRtcpGap
126 : TriageAlertType::kOutgoingRtcpGap;
127
128 const int64_t segment_end_us =
129 parsed_log.log_segments().empty()
130 ? std::numeric_limits<int64_t>::max()
131 : parsed_log.log_segments().front().stop_time_us();
132
133 // TODO(terelius): The parser could provide a list of all packets, ordered
134 // by time, for each direction.
135 std::multimap<int64_t, const LoggedRtpPacket*> rtp_in_direction;
136 for (const auto& stream : parsed_log.rtp_packets_by_ssrc(direction)) {
137 for (const LoggedRtpPacket& rtp_packet : stream.packet_view)
138 rtp_in_direction.emplace(rtp_packet.log_time_us(), &rtp_packet);
139 }
140 absl::optional<int64_t> last_rtp_time;
141 for (const auto& kv : rtp_in_direction) {
142 int64_t timestamp = kv.first;
143 if (timestamp > segment_end_us) {
144 // Only process the first (LOG_START, LOG_END) segment.
145 break;
146 }
147 int64_t duration = timestamp - last_rtp_time.value_or(0);
148 if (last_rtp_time.has_value() && duration > kMaxRtpTransmissionGap) {
149 // No packet sent/received for more than 500 ms.
150 Alert(rtp_alert, config_.GetCallTimeSec(timestamp), rtp_explanation);
151 }
152 last_rtp_time.emplace(timestamp);
153 }
154
155 absl::optional<int64_t> last_rtcp_time;
156 if (direction == kIncomingPacket) {
157 for (const auto& rtcp : parsed_log.incoming_rtcp_packets()) {
158 if (rtcp.log_time_us() > segment_end_us) {
159 // Only process the first (LOG_START, LOG_END) segment.
160 break;
161 }
162 int64_t duration = rtcp.log_time_us() - last_rtcp_time.value_or(0);
163 if (last_rtcp_time.has_value() && duration > kMaxRtcpTransmissionGap) {
164 // No feedback sent/received for more than 2000 ms.
165 Alert(rtcp_alert, config_.GetCallTimeSec(rtcp.log_time_us()),
166 rtcp_explanation);
167 }
168 last_rtcp_time.emplace(rtcp.log_time_us());
169 }
170 } else {
171 for (const auto& rtcp : parsed_log.outgoing_rtcp_packets()) {
172 if (rtcp.log_time_us() > segment_end_us) {
173 // Only process the first (LOG_START, LOG_END) segment.
174 break;
175 }
176 int64_t duration = rtcp.log_time_us() - last_rtcp_time.value_or(0);
177 if (last_rtcp_time.has_value() && duration > kMaxRtcpTransmissionGap) {
178 // No feedback sent/received for more than 2000 ms.
179 Alert(rtcp_alert, config_.GetCallTimeSec(rtcp.log_time_us()),
180 rtcp_explanation);
181 }
182 last_rtcp_time.emplace(rtcp.log_time_us());
183 }
184 }
185}
186
187// TODO(terelius): Notifications could possibly be generated by the same code
188// that produces the graphs. There is some code duplication that could be
189// avoided, but that might be solved anyway when we move functionality from the
190// analyzer to the parser.
191void TriageHelper::AnalyzeLog(const ParsedRtcEventLog& parsed_log) {
192 AnalyzeStreamGaps(parsed_log, kIncomingPacket);
193 AnalyzeStreamGaps(parsed_log, kOutgoingPacket);
194 AnalyzeTransmissionGaps(parsed_log, kIncomingPacket);
195 AnalyzeTransmissionGaps(parsed_log, kOutgoingPacket);
196
197 const int64_t segment_end_us =
198 parsed_log.log_segments().empty()
199 ? std::numeric_limits<int64_t>::max()
200 : parsed_log.log_segments().front().stop_time_us();
201
202 int64_t first_occurence = parsed_log.last_timestamp();
203 constexpr double kMaxLossFraction = 0.05;
204 // Loss feedback
205 int64_t total_lost_packets = 0;
206 int64_t total_expected_packets = 0;
207 for (auto& bwe_update : parsed_log.bwe_loss_updates()) {
208 if (bwe_update.log_time_us() > segment_end_us) {
209 // Only process the first (LOG_START, LOG_END) segment.
210 break;
211 }
212 int64_t lost_packets = static_cast<double>(bwe_update.fraction_lost) / 255 *
213 bwe_update.expected_packets;
214 total_lost_packets += lost_packets;
215 total_expected_packets += bwe_update.expected_packets;
216 if (bwe_update.fraction_lost >= 255 * kMaxLossFraction) {
217 first_occurence = std::min(first_occurence, bwe_update.log_time_us());
218 }
219 }
220 double avg_outgoing_loss =
221 static_cast<double>(total_lost_packets) / total_expected_packets;
222 if (avg_outgoing_loss > kMaxLossFraction) {
223 Alert(TriageAlertType::kOutgoingHighLoss, first_occurence,
224 "More than 5% of outgoing packets lost.");
225 }
226}
227
228} // namespace webrtc