Kimiyuki Onaka | b767cb1 | 2021-01-22 15:00:22 +0900 | [diff] [blame^] | 1 | // Copyright 2021 The Chromium OS Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "crash-reporter/kernel_util.h" |
| 6 | |
| 7 | #include <algorithm> |
| 8 | |
| 9 | #include <base/logging.h> |
| 10 | #include <base/stl_util.h> |
| 11 | #include <base/strings/string_number_conversions.h> |
| 12 | #include <base/strings/string_util.h> |
| 13 | #include <base/strings/stringprintf.h> |
| 14 | #include <re2/re2.h> |
| 15 | |
| 16 | #include "crash-reporter/util.h" |
| 17 | |
| 18 | using base::StringPiece; |
| 19 | using base::StringPrintf; |
| 20 | |
| 21 | namespace { |
| 22 | |
| 23 | constexpr char kDefaultKernelStackSignature[] = |
| 24 | "kernel-UnspecifiedStackSignature"; |
| 25 | |
| 26 | // Byte length of maximum human readable portion of a kernel crash signature. |
| 27 | constexpr size_t kMaxHumanStringLength = 40; |
| 28 | // Time in seconds from the final kernel log message for a call stack |
| 29 | // to count towards the signature of the kcrash. |
| 30 | constexpr int kSignatureTimestampWindow = 2; |
| 31 | // Kernel log timestamp regular expression. |
| 32 | // Specify the multiline option so that ^ matches the start of lines, not just |
| 33 | // the start of the text. |
| 34 | constexpr char kTimestampRegex[] = "(?m)^<.*>\\[\\s*(\\d+\\.\\d+)\\]"; |
| 35 | |
| 36 | // |
| 37 | // These regular expressions enable to us capture the function name of |
| 38 | // the PC in a backtrace. |
| 39 | // The backtrace is obtained through dmesg or the kernel's preserved/kcrashmem |
| 40 | // feature. |
| 41 | // |
| 42 | // For ARM we see: |
| 43 | // "<5>[ 39.458982] PC is at write_breakme+0xd0/0x1b4" (arm32) |
| 44 | // "<4>[ 263.857834] pc : lkdtm_BUG+0xc/0x10" (arm64) |
| 45 | // For MIPS we see: |
| 46 | // "<5>[ 3378.552000] epc : 804010f0 lkdtm_do_action+0x68/0x3f8" |
| 47 | // For x86: |
| 48 | // "<0>[ 37.474699] EIP: [<790ed488>] write_breakme+0x80/0x108 |
| 49 | // SS:ESP 0068:e9dd3efc" |
| 50 | // For x86_64: |
| 51 | // "<5>[ 1505.853254] RIP: 0010:[<ffffffff94fb0c27>] [<ffffffff94fb0c27>] |
| 52 | // list_del_init+0x8/0x1b" (v4.10-) |
| 53 | // "<4>[ 2358.194253] RIP: 0010:pick_task_fair+0x55/0x77" (v4.10+) |
| 54 | // |
| 55 | const char* const kPCFuncNameRegex[] = { |
| 56 | nullptr, R"( (?:PC is at |pc : )([^\+\[ ]+).*)", |
| 57 | R"( epc\s+:\s+\S+\s+([^\+ ]+).*)", // MIPS has an exception |
| 58 | // program counter |
| 59 | R"( EIP: \[<.*>\] ([^\+ ]+).*)", // X86 uses EIP for the |
| 60 | // program counter |
| 61 | R"( RIP: [[:xdigit:]]{4}:(?:\[<[[:xdigit:]]+>\] \[<[[:xdigit:]]+>\] )?)" |
| 62 | R"(([^\+ ]+)\+0x.*)", // X86_64 uses RIP |
| 63 | }; |
| 64 | |
| 65 | static_assert(base::size(kPCFuncNameRegex) == kernel_util::kArchCount, |
| 66 | "Missing Arch PC func_name RegExp"); |
| 67 | |
| 68 | void ProcessStackTrace(re2::StringPiece kernel_dump, |
| 69 | unsigned* hash, |
| 70 | float* last_stack_timestamp, |
| 71 | bool* is_watchdog_crash) { |
| 72 | RE2 line_re("(.+)"); |
| 73 | |
| 74 | RE2::Options opt; |
| 75 | opt.set_case_sensitive(false); |
| 76 | RE2 stack_trace_start_re( |
| 77 | std::string(kTimestampRegex) + " (Call Trace|Backtrace):$", opt); |
| 78 | |
| 79 | // Match lines such as the following and grab out "function_name". |
| 80 | // The ? may or may not be present. |
| 81 | // |
| 82 | // For ARM: |
| 83 | // <4>[ 3498.731164] [<c0057220>] ? (function_name+0x20/0x2c) from |
| 84 | // [<c018062c>] (foo_bar+0xdc/0x1bc) (arm32 older) |
| 85 | // <4>[ 263.956936] lkdtm_do_action+0x24/0x40 (arm64 / arm32 newer) |
| 86 | // |
| 87 | // For MIPS: |
| 88 | // <5>[ 3378.656000] [<804010f0>] lkdtm_do_action+0x68/0x3f8 |
| 89 | // |
| 90 | // For X86: |
| 91 | // <4>[ 6066.849504] [<7937bcee>] ? function_name+0x66/0x6c |
| 92 | // <4>[ 2358.194379] __schedule+0x83f/0xf92 (newer) like arm64 above |
| 93 | // |
| 94 | RE2 stack_entry_re( |
| 95 | std::string(kTimestampRegex) + |
| 96 | R"(\s+(?:\[<[[:xdigit:]]+>\])?)" // Matches " [<7937bcee>]" (if any) |
| 97 | R"(([\s?(]+))" // Matches " ? (" (ARM) or " ? " (X86) |
| 98 | R"(([^\+ )]+))"); // Matches until delimiter reached |
| 99 | std::string line; |
| 100 | std::string hashable; |
| 101 | std::string previous_hashable; |
| 102 | bool is_watchdog = false; |
| 103 | |
| 104 | *hash = 0; |
| 105 | *last_stack_timestamp = 0; |
| 106 | |
| 107 | // Find the last and second-to-last stack traces. The latter is used when |
| 108 | // the panic is from a watchdog timeout. |
| 109 | while (RE2::FindAndConsume(&kernel_dump, line_re, &line)) { |
| 110 | std::string certainty; |
| 111 | std::string function_name; |
| 112 | if (RE2::PartialMatch(line, stack_trace_start_re, last_stack_timestamp)) { |
| 113 | previous_hashable = hashable; |
| 114 | hashable.clear(); |
| 115 | is_watchdog = false; |
| 116 | } else if (RE2::PartialMatch(line, stack_entry_re, last_stack_timestamp, |
| 117 | &certainty, &function_name)) { |
| 118 | bool is_certain = certainty.find('?') == std::string::npos; |
| 119 | // Do not include any uncertain (prefixed by '?') frames in our hash. |
| 120 | if (!is_certain) |
| 121 | continue; |
| 122 | if (!hashable.empty()) |
| 123 | hashable.append("|"); |
| 124 | if (function_name == "watchdog_timer_fn" || function_name == "watchdog") { |
| 125 | is_watchdog = true; |
| 126 | } |
| 127 | hashable.append(function_name); |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | // If the last stack trace contains a watchdog function we assume the panic |
| 132 | // is from the watchdog timer, and we hash the previous stack trace rather |
| 133 | // than the last one, assuming that the previous stack is that of the hung |
| 134 | // thread. |
| 135 | // |
| 136 | // In addition, if the hashable is empty (meaning all frames are uncertain, |
| 137 | // for whatever reason) also use the previous frame, as it cannot be any |
| 138 | // worse. |
| 139 | if (is_watchdog || hashable.empty()) { |
| 140 | hashable = previous_hashable; |
| 141 | } |
| 142 | |
| 143 | *hash = util::HashString(StringPiece(hashable)); |
| 144 | *is_watchdog_crash = is_watchdog; |
| 145 | } |
| 146 | |
| 147 | bool FindCrashingFunction(re2::StringPiece kernel_dump, |
| 148 | float stack_trace_timestamp, |
| 149 | kernel_util::ArchKind arch, |
| 150 | std::string* crashing_function) { |
| 151 | float timestamp = 0; |
| 152 | |
| 153 | // Use the correct regex for this architecture. |
| 154 | if (kPCFuncNameRegex[arch] == nullptr) { |
| 155 | LOG(WARNING) << "PC func_name RegExp is not defined for this architecture"; |
| 156 | return false; |
| 157 | } |
| 158 | RE2 func_re(std::string(kTimestampRegex) + kPCFuncNameRegex[arch]); |
| 159 | |
| 160 | while (RE2::FindAndConsume(&kernel_dump, func_re, ×tamp, |
| 161 | crashing_function)) { |
| 162 | } |
| 163 | if (timestamp == 0) { |
| 164 | LOG(WARNING) << "Found no crashing function"; |
| 165 | return false; |
| 166 | } |
| 167 | if (stack_trace_timestamp != 0 && |
| 168 | abs(static_cast<int>(stack_trace_timestamp - timestamp)) > |
| 169 | kSignatureTimestampWindow) { |
| 170 | LOG(WARNING) << "Found crashing function but not within window"; |
| 171 | return false; |
| 172 | } |
| 173 | return true; |
| 174 | } |
| 175 | |
| 176 | bool FindPanicMessage(re2::StringPiece kernel_dump, |
| 177 | std::string* panic_message) { |
| 178 | // Match lines such as the following and grab out "Fatal exception" |
| 179 | // <0>[ 342.841135] Kernel panic - not syncing: Fatal exception |
| 180 | RE2 kernel_panic_re(std::string(kTimestampRegex) + |
| 181 | " Kernel panic[^\\:]*\\:\\s*(.*)"); |
| 182 | float timestamp = 0; |
| 183 | while (RE2::FindAndConsume(&kernel_dump, kernel_panic_re, ×tamp, |
| 184 | panic_message)) { |
| 185 | } |
| 186 | if (timestamp == 0) { |
| 187 | LOG(WARNING) << "Found no panic message"; |
| 188 | return false; |
| 189 | } |
| 190 | return true; |
| 191 | } |
| 192 | |
| 193 | } // namespace |
| 194 | |
| 195 | namespace kernel_util { |
| 196 | |
| 197 | const char kKernelExecName[] = "kernel"; |
| 198 | |
| 199 | ArchKind GetCompilerArch() { |
| 200 | #if defined(COMPILER_GCC) && defined(ARCH_CPU_ARM_FAMILY) |
| 201 | return kArchArm; |
| 202 | #elif defined(COMPILER_GCC) && defined(ARCH_CPU_MIPS_FAMILY) |
| 203 | return kArchMips; |
| 204 | #elif defined(COMPILER_GCC) && defined(ARCH_CPU_X86_64) |
| 205 | return kArchX86_64; |
| 206 | #elif defined(COMPILER_GCC) && defined(ARCH_CPU_X86_FAMILY) |
| 207 | return kArchX86; |
| 208 | #else |
| 209 | return kArchUnknown; |
| 210 | #endif |
| 211 | } |
| 212 | |
| 213 | std::string ComputeKernelStackSignature(const std::string& kernel_dump, |
| 214 | ArchKind arch) { |
| 215 | unsigned stack_hash = 0; |
| 216 | float last_stack_timestamp = 0; |
| 217 | std::string human_string; |
| 218 | bool is_watchdog_crash; |
| 219 | |
| 220 | ProcessStackTrace(kernel_dump, &stack_hash, &last_stack_timestamp, |
| 221 | &is_watchdog_crash); |
| 222 | |
| 223 | if (!FindCrashingFunction(kernel_dump, last_stack_timestamp, arch, |
| 224 | &human_string)) { |
| 225 | if (!FindPanicMessage(kernel_dump, &human_string)) { |
| 226 | LOG(WARNING) << "Found no human readable string, using empty string"; |
| 227 | human_string.clear(); |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | if (human_string.empty() && stack_hash == 0) { |
| 232 | LOG(WARNING) << "Cannot find a stack or a human readable string"; |
| 233 | return kDefaultKernelStackSignature; |
| 234 | } |
| 235 | |
| 236 | human_string = human_string.substr(0, kMaxHumanStringLength); |
| 237 | return StringPrintf("%s-%s%s-%08X", kKernelExecName, |
| 238 | (is_watchdog_crash ? "(HANG)-" : ""), |
| 239 | human_string.c_str(), stack_hash); |
| 240 | } |
| 241 | |
| 242 | std::string BiosCrashSignature(const std::string& dump) { |
| 243 | const char* type = ""; |
| 244 | |
| 245 | if (RE2::PartialMatch(dump, RE2("PANIC in EL3"))) |
| 246 | type = "PANIC"; |
| 247 | else if (RE2::PartialMatch(dump, RE2("Unhandled Exception in EL3"))) |
| 248 | type = "EXCPT"; |
| 249 | else if (RE2::PartialMatch(dump, RE2("Unhandled Interrupt Exception in"))) |
| 250 | type = "INTR"; |
| 251 | |
| 252 | std::string elr; |
| 253 | RE2::PartialMatch(dump, RE2("x30 =\\s+(0x[0-9a-fA-F]+)"), &elr); |
| 254 | |
| 255 | return StringPrintf("bios-(%s)-%s", type, elr.c_str()); |
| 256 | } |
| 257 | |
| 258 | // Watchdog reboots leave no stack trace. Generate a poor man's signature out |
| 259 | // of the last log line instead (minus the timestamp ended by ']'). |
| 260 | std::string WatchdogSignature(const std::string& console_ramoops) { |
| 261 | StringPiece line(console_ramoops); |
| 262 | constexpr char kTimestampEnd[] = "] "; |
| 263 | size_t timestamp_end_pos = line.rfind(kTimestampEnd); |
| 264 | if (timestamp_end_pos != StringPiece::npos) { |
| 265 | line = line.substr(timestamp_end_pos + strlen(kTimestampEnd)); |
| 266 | } |
| 267 | size_t newline_pos = line.find("\n"); |
| 268 | size_t end = (newline_pos == StringPiece::npos |
| 269 | ? StringPiece::npos |
| 270 | : std::min(newline_pos, kMaxHumanStringLength)); |
| 271 | return StringPrintf("%s-(WATCHDOG)-%s-%08X", kKernelExecName, |
| 272 | line.substr(0, end).as_string().c_str(), |
| 273 | util::HashString(line)); |
| 274 | } |
| 275 | |
| 276 | } // namespace kernel_util |