Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "rtc_tools/rtc_event_log_visualizer/alerts.h" |
| 12 | |
| 13 | #include <stdio.h> |
| 14 | |
| 15 | #include <algorithm> |
| 16 | #include <limits> |
| 17 | #include <map> |
| 18 | #include <string> |
| 19 | |
| 20 | #include "logging/rtc_event_log/rtc_event_processor.h" |
| 21 | #include "rtc_base/checks.h" |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 22 | #include "rtc_base/logging.h" |
| 23 | #include "rtc_base/numerics/sequence_number_util.h" |
| 24 | #include "rtc_base/strings/string_builder.h" |
| 25 | |
| 26 | namespace webrtc { |
| 27 | |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 28 | void TriageHelper::AnalyzeStreamGaps(const ParsedRtcEventLog& parsed_log, |
| 29 | PacketDirection direction) { |
| 30 | // With 100 packets/s (~800kbps), false positives would require 10 s without |
| 31 | // data. |
| 32 | constexpr int64_t kMaxSeqNumJump = 1000; |
| 33 | // With a 90 kHz clock, false positives would require 10 s without data. |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 34 | constexpr int64_t kTicksPerMillisec = 90; |
| 35 | constexpr int64_t kCaptureTimeGraceMs = 10000; |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 36 | |
| 37 | std::string seq_num_explanation = |
| 38 | direction == kIncomingPacket |
| 39 | ? "Incoming RTP sequence number jumps more than 1000. Counter may " |
| 40 | "have been reset or rewritten incorrectly in a group call." |
| 41 | : "Outgoing RTP sequence number jumps more than 1000. Counter may " |
| 42 | "have been reset."; |
| 43 | std::string capture_time_explanation = |
| 44 | direction == kIncomingPacket ? "Incoming capture time jumps more than " |
| 45 | "10s. Clock might have been reset." |
| 46 | : "Outgoing capture time jumps more than " |
| 47 | "10s. Clock might have been reset."; |
| 48 | TriageAlertType seq_num_alert = direction == kIncomingPacket |
| 49 | ? TriageAlertType::kIncomingSeqNumJump |
| 50 | : TriageAlertType::kOutgoingSeqNumJump; |
| 51 | TriageAlertType capture_time_alert = |
| 52 | direction == kIncomingPacket ? TriageAlertType::kIncomingCaptureTimeJump |
| 53 | : TriageAlertType::kOutgoingCaptureTimeJump; |
| 54 | |
Björn Terelius | 0d1b28c | 2020-05-27 20:25:06 +0200 | [diff] [blame] | 55 | const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us(); |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 56 | |
| 57 | // Check for gaps in sequence numbers and capture timestamps. |
| 58 | for (const auto& stream : parsed_log.rtp_packets_by_ssrc(direction)) { |
| 59 | if (IsRtxSsrc(parsed_log, direction, stream.ssrc)) { |
| 60 | continue; |
| 61 | } |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 62 | auto packets = stream.packet_view; |
| 63 | if (packets.empty()) { |
| 64 | continue; |
| 65 | } |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 66 | SeqNumUnwrapper<uint16_t> seq_num_unwrapper; |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 67 | int64_t last_seq_num = |
| 68 | seq_num_unwrapper.Unwrap(packets[0].header.sequenceNumber); |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 69 | SeqNumUnwrapper<uint32_t> capture_time_unwrapper; |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 70 | int64_t last_capture_time = |
| 71 | capture_time_unwrapper.Unwrap(packets[0].header.timestamp); |
| 72 | int64_t last_log_time_ms = packets[0].log_time_ms(); |
| 73 | for (const auto& packet : packets) { |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 74 | if (packet.log_time_us() > segment_end_us) { |
| 75 | // Only process the first (LOG_START, LOG_END) segment. |
| 76 | break; |
| 77 | } |
| 78 | |
| 79 | int64_t seq_num = seq_num_unwrapper.Unwrap(packet.header.sequenceNumber); |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 80 | if (std::abs(seq_num - last_seq_num) > kMaxSeqNumJump) { |
Björn Terelius | cb24158 | 2022-02-25 08:22:38 +0000 | [diff] [blame] | 81 | Alert(seq_num_alert, config_.GetCallTimeSec(packet.log_time()), |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 82 | seq_num_explanation); |
| 83 | } |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 84 | last_seq_num = seq_num; |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 85 | |
| 86 | int64_t capture_time = |
| 87 | capture_time_unwrapper.Unwrap(packet.header.timestamp); |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 88 | if (std::abs(capture_time - last_capture_time) > |
| 89 | kTicksPerMillisec * |
| 90 | (kCaptureTimeGraceMs + packet.log_time_ms() - last_log_time_ms)) { |
Björn Terelius | cb24158 | 2022-02-25 08:22:38 +0000 | [diff] [blame] | 91 | Alert(capture_time_alert, config_.GetCallTimeSec(packet.log_time()), |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 92 | capture_time_explanation); |
| 93 | } |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 94 | last_capture_time = capture_time; |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 95 | } |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | void TriageHelper::AnalyzeTransmissionGaps(const ParsedRtcEventLog& parsed_log, |
| 100 | PacketDirection direction) { |
| 101 | constexpr int64_t kMaxRtpTransmissionGap = 500000; |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 102 | constexpr int64_t kMaxRtcpTransmissionGap = 3000000; |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 103 | std::string rtp_explanation = |
| 104 | direction == kIncomingPacket |
| 105 | ? "No RTP packets received for more than 500ms. This indicates a " |
| 106 | "network problem. Temporary video freezes and choppy or robotic " |
| 107 | "audio is unavoidable. Unnecessary BWE drops is a known issue." |
| 108 | : "No RTP packets sent for more than 500 ms. This might be an issue " |
| 109 | "with the pacer."; |
| 110 | std::string rtcp_explanation = |
| 111 | direction == kIncomingPacket |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 112 | ? "No RTCP packets received for more than 3 s. Could be a longer " |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 113 | "connection outage" |
Bjorn Terelius | b914819 | 2020-05-19 14:30:35 +0200 | [diff] [blame] | 114 | : "No RTCP packets sent for more than 3 s. This is most likely a " |
| 115 | "bug."; |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 116 | TriageAlertType rtp_alert = direction == kIncomingPacket |
| 117 | ? TriageAlertType::kIncomingRtpGap |
| 118 | : TriageAlertType::kOutgoingRtpGap; |
| 119 | TriageAlertType rtcp_alert = direction == kIncomingPacket |
| 120 | ? TriageAlertType::kIncomingRtcpGap |
| 121 | : TriageAlertType::kOutgoingRtcpGap; |
| 122 | |
Björn Terelius | 0d1b28c | 2020-05-27 20:25:06 +0200 | [diff] [blame] | 123 | const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us(); |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 124 | |
| 125 | // TODO(terelius): The parser could provide a list of all packets, ordered |
| 126 | // by time, for each direction. |
| 127 | std::multimap<int64_t, const LoggedRtpPacket*> rtp_in_direction; |
| 128 | for (const auto& stream : parsed_log.rtp_packets_by_ssrc(direction)) { |
| 129 | for (const LoggedRtpPacket& rtp_packet : stream.packet_view) |
| 130 | rtp_in_direction.emplace(rtp_packet.log_time_us(), &rtp_packet); |
| 131 | } |
| 132 | absl::optional<int64_t> last_rtp_time; |
| 133 | for (const auto& kv : rtp_in_direction) { |
| 134 | int64_t timestamp = kv.first; |
| 135 | if (timestamp > segment_end_us) { |
| 136 | // Only process the first (LOG_START, LOG_END) segment. |
| 137 | break; |
| 138 | } |
| 139 | int64_t duration = timestamp - last_rtp_time.value_or(0); |
| 140 | if (last_rtp_time.has_value() && duration > kMaxRtpTransmissionGap) { |
| 141 | // No packet sent/received for more than 500 ms. |
Björn Terelius | cb24158 | 2022-02-25 08:22:38 +0000 | [diff] [blame] | 142 | Alert(rtp_alert, config_.GetCallTimeSec(Timestamp::Micros(timestamp)), |
| 143 | rtp_explanation); |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 144 | } |
| 145 | last_rtp_time.emplace(timestamp); |
| 146 | } |
| 147 | |
| 148 | absl::optional<int64_t> last_rtcp_time; |
| 149 | if (direction == kIncomingPacket) { |
| 150 | for (const auto& rtcp : parsed_log.incoming_rtcp_packets()) { |
| 151 | if (rtcp.log_time_us() > segment_end_us) { |
| 152 | // Only process the first (LOG_START, LOG_END) segment. |
| 153 | break; |
| 154 | } |
| 155 | int64_t duration = rtcp.log_time_us() - last_rtcp_time.value_or(0); |
| 156 | if (last_rtcp_time.has_value() && duration > kMaxRtcpTransmissionGap) { |
| 157 | // No feedback sent/received for more than 2000 ms. |
Björn Terelius | cb24158 | 2022-02-25 08:22:38 +0000 | [diff] [blame] | 158 | Alert(rtcp_alert, config_.GetCallTimeSec(rtcp.log_time()), |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 159 | rtcp_explanation); |
| 160 | } |
| 161 | last_rtcp_time.emplace(rtcp.log_time_us()); |
| 162 | } |
| 163 | } else { |
| 164 | for (const auto& rtcp : parsed_log.outgoing_rtcp_packets()) { |
| 165 | if (rtcp.log_time_us() > segment_end_us) { |
| 166 | // Only process the first (LOG_START, LOG_END) segment. |
| 167 | break; |
| 168 | } |
| 169 | int64_t duration = rtcp.log_time_us() - last_rtcp_time.value_or(0); |
| 170 | if (last_rtcp_time.has_value() && duration > kMaxRtcpTransmissionGap) { |
| 171 | // No feedback sent/received for more than 2000 ms. |
Björn Terelius | cb24158 | 2022-02-25 08:22:38 +0000 | [diff] [blame] | 172 | Alert(rtcp_alert, config_.GetCallTimeSec(rtcp.log_time()), |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 173 | rtcp_explanation); |
| 174 | } |
| 175 | last_rtcp_time.emplace(rtcp.log_time_us()); |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | // TODO(terelius): Notifications could possibly be generated by the same code |
| 181 | // that produces the graphs. There is some code duplication that could be |
| 182 | // avoided, but that might be solved anyway when we move functionality from the |
| 183 | // analyzer to the parser. |
| 184 | void TriageHelper::AnalyzeLog(const ParsedRtcEventLog& parsed_log) { |
| 185 | AnalyzeStreamGaps(parsed_log, kIncomingPacket); |
| 186 | AnalyzeStreamGaps(parsed_log, kOutgoingPacket); |
| 187 | AnalyzeTransmissionGaps(parsed_log, kIncomingPacket); |
| 188 | AnalyzeTransmissionGaps(parsed_log, kOutgoingPacket); |
| 189 | |
Björn Terelius | 0d1b28c | 2020-05-27 20:25:06 +0200 | [diff] [blame] | 190 | const int64_t segment_end_us = parsed_log.first_log_segment().stop_time_us(); |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 191 | |
Björn Terelius | cb24158 | 2022-02-25 08:22:38 +0000 | [diff] [blame] | 192 | Timestamp first_occurrence = parsed_log.last_timestamp(); |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 193 | constexpr double kMaxLossFraction = 0.05; |
| 194 | // Loss feedback |
| 195 | int64_t total_lost_packets = 0; |
| 196 | int64_t total_expected_packets = 0; |
| 197 | for (auto& bwe_update : parsed_log.bwe_loss_updates()) { |
| 198 | if (bwe_update.log_time_us() > segment_end_us) { |
| 199 | // Only process the first (LOG_START, LOG_END) segment. |
| 200 | break; |
| 201 | } |
| 202 | int64_t lost_packets = static_cast<double>(bwe_update.fraction_lost) / 255 * |
| 203 | bwe_update.expected_packets; |
| 204 | total_lost_packets += lost_packets; |
| 205 | total_expected_packets += bwe_update.expected_packets; |
| 206 | if (bwe_update.fraction_lost >= 255 * kMaxLossFraction) { |
Björn Terelius | cb24158 | 2022-02-25 08:22:38 +0000 | [diff] [blame] | 207 | first_occurrence = std::min(first_occurrence, bwe_update.log_time()); |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 208 | } |
| 209 | } |
| 210 | double avg_outgoing_loss = |
| 211 | static_cast<double>(total_lost_packets) / total_expected_packets; |
| 212 | if (avg_outgoing_loss > kMaxLossFraction) { |
Björn Terelius | cb24158 | 2022-02-25 08:22:38 +0000 | [diff] [blame] | 213 | Alert(TriageAlertType::kOutgoingHighLoss, |
| 214 | config_.GetCallTimeSec(first_occurrence), |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 215 | "More than 5% of outgoing packets lost."); |
| 216 | } |
| 217 | } |
| 218 | |
Bjorn Terelius | 7634ea7 | 2020-10-02 14:53:50 +0200 | [diff] [blame] | 219 | void TriageHelper::Print(FILE* file) { |
| 220 | fprintf(file, "========== TRIAGE NOTIFICATIONS ==========\n"); |
| 221 | for (const auto& alert : triage_alerts_) { |
| 222 | fprintf(file, "%d %s. First occurrence at %3.3lf\n", alert.second.count, |
| 223 | alert.second.explanation.c_str(), alert.second.first_occurrence); |
| 224 | } |
| 225 | fprintf(file, "========== END TRIAGE NOTIFICATIONS ==========\n"); |
| 226 | } |
| 227 | |
| 228 | void TriageHelper::ProcessAlerts( |
| 229 | std::function<void(int, float, std::string)> f) { |
| 230 | for (const auto& alert : triage_alerts_) { |
| 231 | f(alert.second.count, alert.second.first_occurrence, |
| 232 | alert.second.explanation); |
| 233 | } |
| 234 | } |
| 235 | |
Bjorn Terelius | 48b8279 | 2020-05-19 10:57:24 +0200 | [diff] [blame] | 236 | } // namespace webrtc |