blob: c0d8784fa82bc1a1d0b366c2f22dcfe52a460e45 [file] [log] [blame]
Bjorn Terelius48b82792020-05-19 10:57:24 +02001/*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "rtc_tools/rtc_event_log_visualizer/alerts.h"
12
13#include <stdio.h>
14
15#include <algorithm>
16#include <limits>
17#include <map>
18#include <string>
19
20#include "logging/rtc_event_log/rtc_event_processor.h"
21#include "rtc_base/checks.h"
22#include "rtc_base/format_macros.h"
23#include "rtc_base/logging.h"
24#include "rtc_base/numerics/sequence_number_util.h"
25#include "rtc_base/strings/string_builder.h"
26
27namespace webrtc {
28
Bjorn Terelius48b82792020-05-19 10:57:24 +020029void TriageHelper::AnalyzeStreamGaps(const ParsedRtcEventLog& parsed_log,
30 PacketDirection direction) {
31 // With 100 packets/s (~800kbps), false positives would require 10 s without
32 // data.
33 constexpr int64_t kMaxSeqNumJump = 1000;
34 // With a 90 kHz clock, false positives would require 10 s without data.
Bjorn Tereliusb9148192020-05-19 14:30:35 +020035 constexpr int64_t kTicksPerMillisec = 90;
36 constexpr int64_t kCaptureTimeGraceMs = 10000;
Bjorn Terelius48b82792020-05-19 10:57:24 +020037
38 std::string seq_num_explanation =
39 direction == kIncomingPacket
40 ? "Incoming RTP sequence number jumps more than 1000. Counter may "
41 "have been reset or rewritten incorrectly in a group call."
42 : "Outgoing RTP sequence number jumps more than 1000. Counter may "
43 "have been reset.";
44 std::string capture_time_explanation =
45 direction == kIncomingPacket ? "Incoming capture time jumps more than "
46 "10s. Clock might have been reset."
47 : "Outgoing capture time jumps more than "
48 "10s. Clock might have been reset.";
49 TriageAlertType seq_num_alert = direction == kIncomingPacket
50 ? TriageAlertType::kIncomingSeqNumJump
51 : TriageAlertType::kOutgoingSeqNumJump;
52 TriageAlertType capture_time_alert =
53 direction == kIncomingPacket ? TriageAlertType::kIncomingCaptureTimeJump
54 : TriageAlertType::kOutgoingCaptureTimeJump;
55
Björn Terelius0d1b28c2020-05-27 20:25:06 +020056 const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us();
Bjorn Terelius48b82792020-05-19 10:57:24 +020057
58 // Check for gaps in sequence numbers and capture timestamps.
59 for (const auto& stream : parsed_log.rtp_packets_by_ssrc(direction)) {
60 if (IsRtxSsrc(parsed_log, direction, stream.ssrc)) {
61 continue;
62 }
Bjorn Tereliusb9148192020-05-19 14:30:35 +020063 auto packets = stream.packet_view;
64 if (packets.empty()) {
65 continue;
66 }
Bjorn Terelius48b82792020-05-19 10:57:24 +020067 SeqNumUnwrapper<uint16_t> seq_num_unwrapper;
Bjorn Tereliusb9148192020-05-19 14:30:35 +020068 int64_t last_seq_num =
69 seq_num_unwrapper.Unwrap(packets[0].header.sequenceNumber);
Bjorn Terelius48b82792020-05-19 10:57:24 +020070 SeqNumUnwrapper<uint32_t> capture_time_unwrapper;
Bjorn Tereliusb9148192020-05-19 14:30:35 +020071 int64_t last_capture_time =
72 capture_time_unwrapper.Unwrap(packets[0].header.timestamp);
73 int64_t last_log_time_ms = packets[0].log_time_ms();
74 for (const auto& packet : packets) {
Bjorn Terelius48b82792020-05-19 10:57:24 +020075 if (packet.log_time_us() > segment_end_us) {
76 // Only process the first (LOG_START, LOG_END) segment.
77 break;
78 }
79
80 int64_t seq_num = seq_num_unwrapper.Unwrap(packet.header.sequenceNumber);
Bjorn Tereliusb9148192020-05-19 14:30:35 +020081 if (std::abs(seq_num - last_seq_num) > kMaxSeqNumJump) {
Björn Tereliuscb241582022-02-25 08:22:38 +000082 Alert(seq_num_alert, config_.GetCallTimeSec(packet.log_time()),
Bjorn Terelius48b82792020-05-19 10:57:24 +020083 seq_num_explanation);
84 }
Bjorn Tereliusb9148192020-05-19 14:30:35 +020085 last_seq_num = seq_num;
Bjorn Terelius48b82792020-05-19 10:57:24 +020086
87 int64_t capture_time =
88 capture_time_unwrapper.Unwrap(packet.header.timestamp);
Bjorn Tereliusb9148192020-05-19 14:30:35 +020089 if (std::abs(capture_time - last_capture_time) >
90 kTicksPerMillisec *
91 (kCaptureTimeGraceMs + packet.log_time_ms() - last_log_time_ms)) {
Björn Tereliuscb241582022-02-25 08:22:38 +000092 Alert(capture_time_alert, config_.GetCallTimeSec(packet.log_time()),
Bjorn Terelius48b82792020-05-19 10:57:24 +020093 capture_time_explanation);
94 }
Bjorn Tereliusb9148192020-05-19 14:30:35 +020095 last_capture_time = capture_time;
Bjorn Terelius48b82792020-05-19 10:57:24 +020096 }
97 }
98}
99
100void TriageHelper::AnalyzeTransmissionGaps(const ParsedRtcEventLog& parsed_log,
101 PacketDirection direction) {
102 constexpr int64_t kMaxRtpTransmissionGap = 500000;
Bjorn Tereliusb9148192020-05-19 14:30:35 +0200103 constexpr int64_t kMaxRtcpTransmissionGap = 3000000;
Bjorn Terelius48b82792020-05-19 10:57:24 +0200104 std::string rtp_explanation =
105 direction == kIncomingPacket
106 ? "No RTP packets received for more than 500ms. This indicates a "
107 "network problem. Temporary video freezes and choppy or robotic "
108 "audio is unavoidable. Unnecessary BWE drops is a known issue."
109 : "No RTP packets sent for more than 500 ms. This might be an issue "
110 "with the pacer.";
111 std::string rtcp_explanation =
112 direction == kIncomingPacket
Bjorn Tereliusb9148192020-05-19 14:30:35 +0200113 ? "No RTCP packets received for more than 3 s. Could be a longer "
Bjorn Terelius48b82792020-05-19 10:57:24 +0200114 "connection outage"
Bjorn Tereliusb9148192020-05-19 14:30:35 +0200115 : "No RTCP packets sent for more than 3 s. This is most likely a "
116 "bug.";
Bjorn Terelius48b82792020-05-19 10:57:24 +0200117 TriageAlertType rtp_alert = direction == kIncomingPacket
118 ? TriageAlertType::kIncomingRtpGap
119 : TriageAlertType::kOutgoingRtpGap;
120 TriageAlertType rtcp_alert = direction == kIncomingPacket
121 ? TriageAlertType::kIncomingRtcpGap
122 : TriageAlertType::kOutgoingRtcpGap;
123
Björn Terelius0d1b28c2020-05-27 20:25:06 +0200124 const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us();
Bjorn Terelius48b82792020-05-19 10:57:24 +0200125
126 // TODO(terelius): The parser could provide a list of all packets, ordered
127 // by time, for each direction.
128 std::multimap<int64_t, const LoggedRtpPacket*> rtp_in_direction;
129 for (const auto& stream : parsed_log.rtp_packets_by_ssrc(direction)) {
130 for (const LoggedRtpPacket& rtp_packet : stream.packet_view)
131 rtp_in_direction.emplace(rtp_packet.log_time_us(), &rtp_packet);
132 }
133 absl::optional<int64_t> last_rtp_time;
134 for (const auto& kv : rtp_in_direction) {
135 int64_t timestamp = kv.first;
136 if (timestamp > segment_end_us) {
137 // Only process the first (LOG_START, LOG_END) segment.
138 break;
139 }
140 int64_t duration = timestamp - last_rtp_time.value_or(0);
141 if (last_rtp_time.has_value() && duration > kMaxRtpTransmissionGap) {
142 // No packet sent/received for more than 500 ms.
Björn Tereliuscb241582022-02-25 08:22:38 +0000143 Alert(rtp_alert, config_.GetCallTimeSec(Timestamp::Micros(timestamp)),
144 rtp_explanation);
Bjorn Terelius48b82792020-05-19 10:57:24 +0200145 }
146 last_rtp_time.emplace(timestamp);
147 }
148
149 absl::optional<int64_t> last_rtcp_time;
150 if (direction == kIncomingPacket) {
151 for (const auto& rtcp : parsed_log.incoming_rtcp_packets()) {
152 if (rtcp.log_time_us() > segment_end_us) {
153 // Only process the first (LOG_START, LOG_END) segment.
154 break;
155 }
156 int64_t duration = rtcp.log_time_us() - last_rtcp_time.value_or(0);
157 if (last_rtcp_time.has_value() && duration > kMaxRtcpTransmissionGap) {
158 // No feedback sent/received for more than 2000 ms.
Björn Tereliuscb241582022-02-25 08:22:38 +0000159 Alert(rtcp_alert, config_.GetCallTimeSec(rtcp.log_time()),
Bjorn Terelius48b82792020-05-19 10:57:24 +0200160 rtcp_explanation);
161 }
162 last_rtcp_time.emplace(rtcp.log_time_us());
163 }
164 } else {
165 for (const auto& rtcp : parsed_log.outgoing_rtcp_packets()) {
166 if (rtcp.log_time_us() > segment_end_us) {
167 // Only process the first (LOG_START, LOG_END) segment.
168 break;
169 }
170 int64_t duration = rtcp.log_time_us() - last_rtcp_time.value_or(0);
171 if (last_rtcp_time.has_value() && duration > kMaxRtcpTransmissionGap) {
172 // No feedback sent/received for more than 2000 ms.
Björn Tereliuscb241582022-02-25 08:22:38 +0000173 Alert(rtcp_alert, config_.GetCallTimeSec(rtcp.log_time()),
Bjorn Terelius48b82792020-05-19 10:57:24 +0200174 rtcp_explanation);
175 }
176 last_rtcp_time.emplace(rtcp.log_time_us());
177 }
178 }
179}
180
181// TODO(terelius): Notifications could possibly be generated by the same code
182// that produces the graphs. There is some code duplication that could be
183// avoided, but that might be solved anyway when we move functionality from the
184// analyzer to the parser.
185void TriageHelper::AnalyzeLog(const ParsedRtcEventLog& parsed_log) {
186 AnalyzeStreamGaps(parsed_log, kIncomingPacket);
187 AnalyzeStreamGaps(parsed_log, kOutgoingPacket);
188 AnalyzeTransmissionGaps(parsed_log, kIncomingPacket);
189 AnalyzeTransmissionGaps(parsed_log, kOutgoingPacket);
190
Björn Terelius0d1b28c2020-05-27 20:25:06 +0200191 const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us();
Bjorn Terelius48b82792020-05-19 10:57:24 +0200192
Björn Tereliuscb241582022-02-25 08:22:38 +0000193 Timestamp first_occurrence = parsed_log.last_timestamp();
Bjorn Terelius48b82792020-05-19 10:57:24 +0200194 constexpr double kMaxLossFraction = 0.05;
195 // Loss feedback
196 int64_t total_lost_packets = 0;
197 int64_t total_expected_packets = 0;
198 for (auto& bwe_update : parsed_log.bwe_loss_updates()) {
199 if (bwe_update.log_time_us() > segment_end_us) {
200 // Only process the first (LOG_START, LOG_END) segment.
201 break;
202 }
203 int64_t lost_packets = static_cast<double>(bwe_update.fraction_lost) / 255 *
204 bwe_update.expected_packets;
205 total_lost_packets += lost_packets;
206 total_expected_packets += bwe_update.expected_packets;
207 if (bwe_update.fraction_lost >= 255 * kMaxLossFraction) {
Björn Tereliuscb241582022-02-25 08:22:38 +0000208 first_occurrence = std::min(first_occurrence, bwe_update.log_time());
Bjorn Terelius48b82792020-05-19 10:57:24 +0200209 }
210 }
211 double avg_outgoing_loss =
212 static_cast<double>(total_lost_packets) / total_expected_packets;
213 if (avg_outgoing_loss > kMaxLossFraction) {
Björn Tereliuscb241582022-02-25 08:22:38 +0000214 Alert(TriageAlertType::kOutgoingHighLoss,
215 config_.GetCallTimeSec(first_occurrence),
Bjorn Terelius48b82792020-05-19 10:57:24 +0200216 "More than 5% of outgoing packets lost.");
217 }
218}
219
Bjorn Terelius7634ea72020-10-02 14:53:50 +0200220void TriageHelper::Print(FILE* file) {
221 fprintf(file, "========== TRIAGE NOTIFICATIONS ==========\n");
222 for (const auto& alert : triage_alerts_) {
223 fprintf(file, "%d %s. First occurrence at %3.3lf\n", alert.second.count,
224 alert.second.explanation.c_str(), alert.second.first_occurrence);
225 }
226 fprintf(file, "========== END TRIAGE NOTIFICATIONS ==========\n");
227}
228
229void TriageHelper::ProcessAlerts(
230 std::function<void(int, float, std::string)> f) {
231 for (const auto& alert : triage_alerts_) {
232 f(alert.second.count, alert.second.first_occurrence,
233 alert.second.explanation);
234 }
235}
236
Bjorn Terelius48b82792020-05-19 10:57:24 +0200237} // namespace webrtc