Add DisassemblerObjdump.

This extracts the existing objdump-based disassembler engine used in ExploitabilityLinux into a seperate reusable class, and adds support for most common address operand formats. This is a precursor to using DisassemblerObjdump to handle address resolution for non-canonical address dereferences on amd64. Bug: 901847 Change-Id: I1a06a86fc2e7c76b4d0e79eca5f8a6c501379f47 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3720740 Reviewed-by: Ivan Penkov <ivanpe@google.com> Reviewed-by: Ivan Penkov <ivanpe@chromium.org>
2025-12-28 10:15:10 +01:00 · 2022-10-07 10:43:07 +02:00 · 2022-10-07 10:43:07 +02:00 · 6289830b67
commit 6289830b67
parent bcffe4fe60
13 changed files with 4102 additions and 2282 deletions
--- a/src/processor/exploitability_linux.cc
+++ b/src/processor/exploitability_linux.cc
@ -35,21 +35,13 @@

 #include "processor/exploitability_linux.h"

-#ifndef _WIN32
-#include <regex.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <sstream>
-#include <iterator>
-#endif  // _WIN32
-
 #include <string.h>

 #include "google_breakpad/common/minidump_exception_linux.h"
 #include "google_breakpad/processor/call_stack.h"
 #include "google_breakpad/processor/process_state.h"
 #include "google_breakpad/processor/stack_frame.h"
+#include "processor/disassembler_objdump.h"
 #include "processor/logging.h"

 namespace {
@ -67,11 +59,6 @@ constexpr char kStackCheckFailureFunction[] = "__stack_chk_fail";
 // can determine that the call would overflow the target buffer.
 constexpr char kBoundsCheckFailureFunction[] = "__chk_fail";

-#ifndef _WIN32
-const unsigned int MAX_INSTRUCTION_LEN = 15;
-const unsigned int MAX_OBJDUMP_BUFFER_LEN = 4096;
-#endif  // _WIN32
-
 }  // namespace

 namespace google_breakpad {
@ -198,69 +185,30 @@ bool ExploitabilityLinux::EndedOnIllegalWrite(uint64_t instruction_ptr) {
    BPLOG(INFO) << "No exception or architecture data.";
    return false;
  }
-  // Check architecture and set architecture variable to corresponding flag
-  // in objdump.
-  switch (context->GetContextCPU()) {
-    case MD_CONTEXT_X86:
-      architecture = "i386";
-      break;
-    case MD_CONTEXT_AMD64:
-      architecture = "i386:x86-64";
-      break;
-    default:
-      // Unsupported architecture. Note that ARM architectures are not
-      // supported because objdump does not support ARM.
-      return false;
-  }

-  // Get memory region around instruction pointer and the number of bytes
-  // before and after the instruction pointer in the memory region.
-  const uint8_t* raw_memory = memory_region->GetMemory();
-  const uint64_t base = memory_region->GetBase();
-  if (base > instruction_ptr) {
-    BPLOG(ERROR) << "Memory region base value exceeds instruction pointer.";
-    return false;
-  }
-  const uint64_t offset = instruction_ptr - base;
-  if (memory_region->GetSize() < MAX_INSTRUCTION_LEN + offset) {
-    BPLOG(INFO) << "Not enough bytes left to guarantee complete instruction.";
+  DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region,
+                                   instruction_ptr);
+  if (!disassembler.IsValid()) {
+    BPLOG(INFO) << "Disassembling fault instruction failed.";
    return false;
  }

-  // Convert bytes into objdump output.
-  char objdump_output_buffer[MAX_OBJDUMP_BUFFER_LEN] = {0};
-  DisassembleBytes(architecture,
-                   raw_memory + offset,
-                   MAX_INSTRUCTION_LEN,
-                   MAX_OBJDUMP_BUFFER_LEN,
-                   objdump_output_buffer);
-
-  string line;
-  if (!GetObjdumpInstructionLine(objdump_output_buffer, &line)) {
-    return false;
-  }
-
-  // Convert objdump instruction line into the operation and operands.
-  string instruction = "";
-  string dest = "";
-  string src = "";
-  TokenizeObjdumpInstruction(line, &instruction, &dest, &src);
-
-  // Check if the operation is a write to memory. First, the instruction
-  // must one that can write to memory. Second, the write destination
-  // must be a spot in memory rather than a register. Since there are no
-  // symbols from objdump, the destination will be enclosed by brackets.
-  if (dest.size() > 2 && dest.at(0) == '[' && dest.at(dest.size() - 1) == ']' &&
-      (!instruction.compare("mov") || !instruction.compare("inc") ||
-       !instruction.compare("dec") || !instruction.compare("and") ||
-       !instruction.compare("or") || !instruction.compare("xor") ||
-       !instruction.compare("not") || !instruction.compare("neg") ||
-       !instruction.compare("add") || !instruction.compare("sub") ||
-       !instruction.compare("shl") || !instruction.compare("shr"))) {
-    // Strip away enclosing brackets from the destination address.
-    dest = dest.substr(1, dest.size() - 2);
+  // Check if the operation is a write to memory.
+  // First, the instruction must one that can write to memory.
+  auto instruction = disassembler.operation();
+  if (!instruction.compare("mov") || !instruction.compare("inc") ||
+      !instruction.compare("dec") || !instruction.compare("and") ||
+      !instruction.compare("or") || !instruction.compare("xor") ||
+      !instruction.compare("not") || !instruction.compare("neg") ||
+      !instruction.compare("add") || !instruction.compare("sub") ||
+      !instruction.compare("shl") || !instruction.compare("shr")) {
    uint64_t write_address = 0;
-    CalculateAddress(dest, *context, &write_address);
+
+    // Check that the destination is a memory address. CalculateDestAddress will
+    // return false if the destination is not a memory address.
+    if (!disassembler.CalculateDestAddress(*context, write_address)) {
+      return false;
+    }

    // If the program crashed as a result of a write, the destination of
    // the write must have been an address that did not permit writing.
@ -268,271 +216,14 @@ bool ExploitabilityLinux::EndedOnIllegalWrite(uint64_t instruction_ptr) {
    // the crash does not suggest exploitability for writes with such a
    // low target address.
    return write_address > 4096;
+  } else {
+    return false;
  }
+
 #endif  // _WIN32
  return false;
 }

-#ifndef _WIN32
-bool ExploitabilityLinux::CalculateAddress(const string& address_expression,
-                                           const DumpContext& context,
-                                           uint64_t* write_address) {
-  // The destination should be the format reg+a or reg-a, where reg
-  // is a register and a is a hexadecimal constant. Although more complex
-  // expressions can make valid instructions, objdump's disassembly outputs
-  // it in this simpler format.
-  // TODO(liuandrew): Handle more complex formats, should they arise.
-
-  if (!write_address) {
-    BPLOG(ERROR) << "Null parameter.";
-    return false;
-  }
-
-  // Clone parameter into a non-const string.
-  string expression = address_expression;
-
-  // Parse out the constant that is added to the address (if it exists).
-  size_t delim = expression.find('+');
-  bool positive_add_constant = true;
-  // Check if constant is subtracted instead of added.
-  if (delim == string::npos) {
-    positive_add_constant = false;
-    delim = expression.find('-');
-  }
-  uint32_t add_constant = 0;
-  // Save constant and remove it from the expression.
-  if (delim != string::npos) {
-    if (!sscanf(expression.substr(delim + 1).c_str(), "%x", &add_constant)) {
-      BPLOG(ERROR) << "Failed to scan constant.";
-      return false;
-    }
-    expression = expression.substr(0, delim);
-  }
-
-  // Set the the write address to the corresponding register.
-  // TODO(liuandrew): Add support for partial registers, such as
-  // the rax/eax/ax/ah/al chain.
-  switch (context.GetContextCPU()) {
-    case MD_CONTEXT_X86:
-      if (!expression.compare("eax")) {
-        *write_address = context.GetContextX86()->eax;
-      } else if (!expression.compare("ebx")) {
-        *write_address = context.GetContextX86()->ebx;
-      } else if (!expression.compare("ecx")) {
-        *write_address = context.GetContextX86()->ecx;
-      } else if (!expression.compare("edx")) {
-        *write_address = context.GetContextX86()->edx;
-      } else if (!expression.compare("edi")) {
-        *write_address = context.GetContextX86()->edi;
-      } else if (!expression.compare("esi")) {
-        *write_address = context.GetContextX86()->esi;
-      } else if (!expression.compare("ebp")) {
-        *write_address = context.GetContextX86()->ebp;
-      } else if (!expression.compare("esp")) {
-        *write_address = context.GetContextX86()->esp;
-      } else if (!expression.compare("eip")) {
-        *write_address = context.GetContextX86()->eip;
-      } else {
-        BPLOG(ERROR) << "Unsupported register";
-        return false;
-      }
-      break;
-    case MD_CONTEXT_AMD64:
-      if (!expression.compare("rax")) {
-        *write_address = context.GetContextAMD64()->rax;
-      } else if (!expression.compare("rbx")) {
-        *write_address = context.GetContextAMD64()->rbx;
-      } else if (!expression.compare("rcx")) {
-        *write_address = context.GetContextAMD64()->rcx;
-      } else if (!expression.compare("rdx")) {
-        *write_address = context.GetContextAMD64()->rdx;
-      } else if (!expression.compare("rdi")) {
-        *write_address = context.GetContextAMD64()->rdi;
-      } else if (!expression.compare("rsi")) {
-        *write_address = context.GetContextAMD64()->rsi;
-      } else if (!expression.compare("rbp")) {
-        *write_address = context.GetContextAMD64()->rbp;
-      } else if (!expression.compare("rsp")) {
-        *write_address = context.GetContextAMD64()->rsp;
-      } else if (!expression.compare("rip")) {
-        *write_address = context.GetContextAMD64()->rip;
-      } else if (!expression.compare("r8")) {
-        *write_address = context.GetContextAMD64()->r8;
-      } else if (!expression.compare("r9")) {
-        *write_address = context.GetContextAMD64()->r9;
-      } else if (!expression.compare("r10")) {
-        *write_address = context.GetContextAMD64()->r10;
-      } else if (!expression.compare("r11")) {
-        *write_address = context.GetContextAMD64()->r11;
-      } else if (!expression.compare("r12")) {
-        *write_address = context.GetContextAMD64()->r12;
-      } else if (!expression.compare("r13")) {
-        *write_address = context.GetContextAMD64()->r13;
-      } else if (!expression.compare("r14")) {
-        *write_address = context.GetContextAMD64()->r14;
-      } else if (!expression.compare("r15")) {
-        *write_address = context.GetContextAMD64()->r15;
-      } else {
-        BPLOG(ERROR) << "Unsupported register";
-        return false;
-      }
-      break;
-    default:
-      // This should not occur since the same switch condition
-      // should have terminated this method.
-      return false;
-  }
-
-  // Add or subtract constant from write address (if applicable).
-  *write_address =
-      positive_add_constant ?
-      *write_address + add_constant : *write_address - add_constant;
-
-  return true;
-}
-
-// static
-bool ExploitabilityLinux::GetObjdumpInstructionLine(
-    const char* objdump_output_buffer,
-    string* instruction_line) {
-  // Put buffer data into stream to output line-by-line.
-  std::stringstream objdump_stream;
-  objdump_stream.str(string(objdump_output_buffer));
-
-  // Pipe each output line into the string until the string contains the first
-  // instruction from objdump.  All lines before the "<.data>:" section are
-  // skipped.  Loop until the line shows the first instruction or there are no
-  // lines left.
-  bool data_section_seen = false;
-  do {
-    if (!getline(objdump_stream, *instruction_line)) {
-      BPLOG(INFO) << "Objdump instructions not found";
-      return false;
-    }
-    if (instruction_line->find("<.data>:") != string::npos) {
-      data_section_seen = true;
-    }
-  } while (!data_section_seen || instruction_line->find("0:") == string::npos);
-  // This first instruction contains the above substring.
-
-  return true;
-}
-
-bool ExploitabilityLinux::TokenizeObjdumpInstruction(const string& line,
-                                                     string* operation,
-                                                     string* dest,
-                                                     string* src) {
-  if (!operation || !dest || !src) {
-    BPLOG(ERROR) << "Null parameters passed.";
-    return false;
-  }
-
-  // Set all pointer values to empty strings.
-  *operation = "";
-  *dest = "";
-  *src = "";
-
-  // Tokenize the objdump line.
-  vector<string> tokens;
-  std::istringstream line_stream(line);
-  copy(std::istream_iterator<string>(line_stream),
-       std::istream_iterator<string>(),
-       std::back_inserter(tokens));
-
-  // Regex for the data in hex form. Each byte is two hex digits.
-  regex_t regex;
-  regcomp(&regex, "^[[:xdigit:]]{2}$", REG_EXTENDED | REG_NOSUB);
-
-  // Find and set the location of the operator. The operator appears
-  // directly after the chain of bytes that define the instruction. The
-  // operands will be the last token, given that the instruction has operands.
-  // If not, the operator is the last token. The loop skips the first token
-  // because the first token is the instruction number (namely "0:").
-  string operands = "";
-  for (size_t i = 1; i < tokens.size(); i++) {
-    // Check if current token no longer is in byte format.
-    if (regexec(&regex, tokens[i].c_str(), 0, NULL, 0)) {
-      // instruction = tokens[i];
-      *operation = tokens[i];
-      // If the operator is the last token, there are no operands.
-      if (i != tokens.size() - 1) {
-        operands = tokens[tokens.size() - 1];
-      }
-      break;
-    }
-  }
-  regfree(&regex);
-
-  if (operation->empty()) {
-    BPLOG(ERROR) << "Failed to parse out operation from objdump instruction.";
-    return false;
-  }
-
-  // Split operands into source and destination (if applicable).
-  if (!operands.empty()) {
-    size_t delim = operands.find(',');
-    if (delim == string::npos) {
-      *dest = operands;
-    } else {
-      *dest = operands.substr(0, delim);
-      *src = operands.substr(delim + 1);
-    }
-  }
-  return true;
-}
-
-bool ExploitabilityLinux::DisassembleBytes(const string& architecture,
-                                           const uint8_t* raw_bytes,
-                                           const unsigned int raw_bytes_len,
-                                           const unsigned int buffer_len,
-                                           char* objdump_output_buffer) {
-  if (!raw_bytes || !objdump_output_buffer ||
-      raw_bytes_len > MAX_INSTRUCTION_LEN) {
-    BPLOG(ERROR) << "Bad input parameters.";
-    return false;
-  }
-
-  // Write raw bytes around instruction pointer to a temporary file to
-  // pass as an argument to objdump.
-  char raw_bytes_tmpfile[] = "/tmp/breakpad_mem_region-raw_bytes-XXXXXX";
-  int raw_bytes_fd = mkstemp(raw_bytes_tmpfile);
-  if (raw_bytes_fd < 0) {
-    BPLOG(ERROR) << "Failed to create tempfile.";
-    unlink(raw_bytes_tmpfile);
-    return false;
-  }
-  // Casting raw_bytes_len to `ssize_t` won't cause a sign flip, since we check
-  // its bounds above.
-  if (write(raw_bytes_fd, raw_bytes, raw_bytes_len) != (ssize_t)raw_bytes_len) {
-    BPLOG(ERROR) << "Writing of raw bytes failed.";
-    unlink(raw_bytes_tmpfile);
-    return false;
-  }
-
-  char cmd[1024] = {0};
-  snprintf(cmd,
-           1024,
-           "objdump -D -b binary -M intel -m %s %s",
-           architecture.c_str(),
-           raw_bytes_tmpfile);
-  FILE* objdump_fp = popen(cmd, "r");
-  if (!objdump_fp) {
-    unlink(raw_bytes_tmpfile);
-    BPLOG(ERROR) << "Failed to call objdump.";
-    return false;
-  }
-  if (fread(objdump_output_buffer, 1, buffer_len, objdump_fp) <= 0) {
-    pclose(objdump_fp);
-    unlink(raw_bytes_tmpfile);
-    BPLOG(ERROR) << "Failed to read objdump output.";
-    return false;
-  }
-  pclose(objdump_fp);
-  unlink(raw_bytes_tmpfile);
-  return true;
-}
-#endif  // _WIN32
-
 bool ExploitabilityLinux::StackPointerOffStack(uint64_t stack_ptr) {
  MinidumpLinuxMapsList* linux_maps_list = dump_->GetLinuxMapsList();
  // Inconclusive if there are no mappings available.