Add DisassemblerObjdump.

This extracts the existing objdump-based disassembler engine used in
ExploitabilityLinux into a seperate reusable class, and adds support
for most common address operand formats.

This is a precursor to using DisassemblerObjdump to handle address
resolution for non-canonical address dereferences on amd64.

Bug: 901847
Change-Id: I1a06a86fc2e7c76b4d0e79eca5f8a6c501379f47
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3720740
Reviewed-by: Ivan Penkov <ivanpe@google.com>
Reviewed-by: Ivan Penkov <ivanpe@chromium.org>
This commit is contained in:
Mark Brand 2022-10-07 10:43:07 +02:00 committed by Ivan Penkov
parent bcffe4fe60
commit 6289830b67
13 changed files with 4102 additions and 2282 deletions

View file

@ -35,21 +35,13 @@
#include "processor/exploitability_linux.h"
#ifndef _WIN32
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <sstream>
#include <iterator>
#endif // _WIN32
#include <string.h>
#include "google_breakpad/common/minidump_exception_linux.h"
#include "google_breakpad/processor/call_stack.h"
#include "google_breakpad/processor/process_state.h"
#include "google_breakpad/processor/stack_frame.h"
#include "processor/disassembler_objdump.h"
#include "processor/logging.h"
namespace {
@ -67,11 +59,6 @@ constexpr char kStackCheckFailureFunction[] = "__stack_chk_fail";
// can determine that the call would overflow the target buffer.
constexpr char kBoundsCheckFailureFunction[] = "__chk_fail";
#ifndef _WIN32
const unsigned int MAX_INSTRUCTION_LEN = 15;
const unsigned int MAX_OBJDUMP_BUFFER_LEN = 4096;
#endif // _WIN32
} // namespace
namespace google_breakpad {
@ -198,69 +185,30 @@ bool ExploitabilityLinux::EndedOnIllegalWrite(uint64_t instruction_ptr) {
BPLOG(INFO) << "No exception or architecture data.";
return false;
}
// Check architecture and set architecture variable to corresponding flag
// in objdump.
switch (context->GetContextCPU()) {
case MD_CONTEXT_X86:
architecture = "i386";
break;
case MD_CONTEXT_AMD64:
architecture = "i386:x86-64";
break;
default:
// Unsupported architecture. Note that ARM architectures are not
// supported because objdump does not support ARM.
return false;
}
// Get memory region around instruction pointer and the number of bytes
// before and after the instruction pointer in the memory region.
const uint8_t* raw_memory = memory_region->GetMemory();
const uint64_t base = memory_region->GetBase();
if (base > instruction_ptr) {
BPLOG(ERROR) << "Memory region base value exceeds instruction pointer.";
return false;
}
const uint64_t offset = instruction_ptr - base;
if (memory_region->GetSize() < MAX_INSTRUCTION_LEN + offset) {
BPLOG(INFO) << "Not enough bytes left to guarantee complete instruction.";
DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region,
instruction_ptr);
if (!disassembler.IsValid()) {
BPLOG(INFO) << "Disassembling fault instruction failed.";
return false;
}
// Convert bytes into objdump output.
char objdump_output_buffer[MAX_OBJDUMP_BUFFER_LEN] = {0};
DisassembleBytes(architecture,
raw_memory + offset,
MAX_INSTRUCTION_LEN,
MAX_OBJDUMP_BUFFER_LEN,
objdump_output_buffer);
string line;
if (!GetObjdumpInstructionLine(objdump_output_buffer, &line)) {
return false;
}
// Convert objdump instruction line into the operation and operands.
string instruction = "";
string dest = "";
string src = "";
TokenizeObjdumpInstruction(line, &instruction, &dest, &src);
// Check if the operation is a write to memory. First, the instruction
// must one that can write to memory. Second, the write destination
// must be a spot in memory rather than a register. Since there are no
// symbols from objdump, the destination will be enclosed by brackets.
if (dest.size() > 2 && dest.at(0) == '[' && dest.at(dest.size() - 1) == ']' &&
(!instruction.compare("mov") || !instruction.compare("inc") ||
!instruction.compare("dec") || !instruction.compare("and") ||
!instruction.compare("or") || !instruction.compare("xor") ||
!instruction.compare("not") || !instruction.compare("neg") ||
!instruction.compare("add") || !instruction.compare("sub") ||
!instruction.compare("shl") || !instruction.compare("shr"))) {
// Strip away enclosing brackets from the destination address.
dest = dest.substr(1, dest.size() - 2);
// Check if the operation is a write to memory.
// First, the instruction must one that can write to memory.
auto instruction = disassembler.operation();
if (!instruction.compare("mov") || !instruction.compare("inc") ||
!instruction.compare("dec") || !instruction.compare("and") ||
!instruction.compare("or") || !instruction.compare("xor") ||
!instruction.compare("not") || !instruction.compare("neg") ||
!instruction.compare("add") || !instruction.compare("sub") ||
!instruction.compare("shl") || !instruction.compare("shr")) {
uint64_t write_address = 0;
CalculateAddress(dest, *context, &write_address);
// Check that the destination is a memory address. CalculateDestAddress will
// return false if the destination is not a memory address.
if (!disassembler.CalculateDestAddress(*context, write_address)) {
return false;
}
// If the program crashed as a result of a write, the destination of
// the write must have been an address that did not permit writing.
@ -268,271 +216,14 @@ bool ExploitabilityLinux::EndedOnIllegalWrite(uint64_t instruction_ptr) {
// the crash does not suggest exploitability for writes with such a
// low target address.
return write_address > 4096;
} else {
return false;
}
#endif // _WIN32
return false;
}
#ifndef _WIN32
bool ExploitabilityLinux::CalculateAddress(const string& address_expression,
const DumpContext& context,
uint64_t* write_address) {
// The destination should be the format reg+a or reg-a, where reg
// is a register and a is a hexadecimal constant. Although more complex
// expressions can make valid instructions, objdump's disassembly outputs
// it in this simpler format.
// TODO(liuandrew): Handle more complex formats, should they arise.
if (!write_address) {
BPLOG(ERROR) << "Null parameter.";
return false;
}
// Clone parameter into a non-const string.
string expression = address_expression;
// Parse out the constant that is added to the address (if it exists).
size_t delim = expression.find('+');
bool positive_add_constant = true;
// Check if constant is subtracted instead of added.
if (delim == string::npos) {
positive_add_constant = false;
delim = expression.find('-');
}
uint32_t add_constant = 0;
// Save constant and remove it from the expression.
if (delim != string::npos) {
if (!sscanf(expression.substr(delim + 1).c_str(), "%x", &add_constant)) {
BPLOG(ERROR) << "Failed to scan constant.";
return false;
}
expression = expression.substr(0, delim);
}
// Set the the write address to the corresponding register.
// TODO(liuandrew): Add support for partial registers, such as
// the rax/eax/ax/ah/al chain.
switch (context.GetContextCPU()) {
case MD_CONTEXT_X86:
if (!expression.compare("eax")) {
*write_address = context.GetContextX86()->eax;
} else if (!expression.compare("ebx")) {
*write_address = context.GetContextX86()->ebx;
} else if (!expression.compare("ecx")) {
*write_address = context.GetContextX86()->ecx;
} else if (!expression.compare("edx")) {
*write_address = context.GetContextX86()->edx;
} else if (!expression.compare("edi")) {
*write_address = context.GetContextX86()->edi;
} else if (!expression.compare("esi")) {
*write_address = context.GetContextX86()->esi;
} else if (!expression.compare("ebp")) {
*write_address = context.GetContextX86()->ebp;
} else if (!expression.compare("esp")) {
*write_address = context.GetContextX86()->esp;
} else if (!expression.compare("eip")) {
*write_address = context.GetContextX86()->eip;
} else {
BPLOG(ERROR) << "Unsupported register";
return false;
}
break;
case MD_CONTEXT_AMD64:
if (!expression.compare("rax")) {
*write_address = context.GetContextAMD64()->rax;
} else if (!expression.compare("rbx")) {
*write_address = context.GetContextAMD64()->rbx;
} else if (!expression.compare("rcx")) {
*write_address = context.GetContextAMD64()->rcx;
} else if (!expression.compare("rdx")) {
*write_address = context.GetContextAMD64()->rdx;
} else if (!expression.compare("rdi")) {
*write_address = context.GetContextAMD64()->rdi;
} else if (!expression.compare("rsi")) {
*write_address = context.GetContextAMD64()->rsi;
} else if (!expression.compare("rbp")) {
*write_address = context.GetContextAMD64()->rbp;
} else if (!expression.compare("rsp")) {
*write_address = context.GetContextAMD64()->rsp;
} else if (!expression.compare("rip")) {
*write_address = context.GetContextAMD64()->rip;
} else if (!expression.compare("r8")) {
*write_address = context.GetContextAMD64()->r8;
} else if (!expression.compare("r9")) {
*write_address = context.GetContextAMD64()->r9;
} else if (!expression.compare("r10")) {
*write_address = context.GetContextAMD64()->r10;
} else if (!expression.compare("r11")) {
*write_address = context.GetContextAMD64()->r11;
} else if (!expression.compare("r12")) {
*write_address = context.GetContextAMD64()->r12;
} else if (!expression.compare("r13")) {
*write_address = context.GetContextAMD64()->r13;
} else if (!expression.compare("r14")) {
*write_address = context.GetContextAMD64()->r14;
} else if (!expression.compare("r15")) {
*write_address = context.GetContextAMD64()->r15;
} else {
BPLOG(ERROR) << "Unsupported register";
return false;
}
break;
default:
// This should not occur since the same switch condition
// should have terminated this method.
return false;
}
// Add or subtract constant from write address (if applicable).
*write_address =
positive_add_constant ?
*write_address + add_constant : *write_address - add_constant;
return true;
}
// static
bool ExploitabilityLinux::GetObjdumpInstructionLine(
const char* objdump_output_buffer,
string* instruction_line) {
// Put buffer data into stream to output line-by-line.
std::stringstream objdump_stream;
objdump_stream.str(string(objdump_output_buffer));
// Pipe each output line into the string until the string contains the first
// instruction from objdump. All lines before the "<.data>:" section are
// skipped. Loop until the line shows the first instruction or there are no
// lines left.
bool data_section_seen = false;
do {
if (!getline(objdump_stream, *instruction_line)) {
BPLOG(INFO) << "Objdump instructions not found";
return false;
}
if (instruction_line->find("<.data>:") != string::npos) {
data_section_seen = true;
}
} while (!data_section_seen || instruction_line->find("0:") == string::npos);
// This first instruction contains the above substring.
return true;
}
bool ExploitabilityLinux::TokenizeObjdumpInstruction(const string& line,
string* operation,
string* dest,
string* src) {
if (!operation || !dest || !src) {
BPLOG(ERROR) << "Null parameters passed.";
return false;
}
// Set all pointer values to empty strings.
*operation = "";
*dest = "";
*src = "";
// Tokenize the objdump line.
vector<string> tokens;
std::istringstream line_stream(line);
copy(std::istream_iterator<string>(line_stream),
std::istream_iterator<string>(),
std::back_inserter(tokens));
// Regex for the data in hex form. Each byte is two hex digits.
regex_t regex;
regcomp(&regex, "^[[:xdigit:]]{2}$", REG_EXTENDED | REG_NOSUB);
// Find and set the location of the operator. The operator appears
// directly after the chain of bytes that define the instruction. The
// operands will be the last token, given that the instruction has operands.
// If not, the operator is the last token. The loop skips the first token
// because the first token is the instruction number (namely "0:").
string operands = "";
for (size_t i = 1; i < tokens.size(); i++) {
// Check if current token no longer is in byte format.
if (regexec(&regex, tokens[i].c_str(), 0, NULL, 0)) {
// instruction = tokens[i];
*operation = tokens[i];
// If the operator is the last token, there are no operands.
if (i != tokens.size() - 1) {
operands = tokens[tokens.size() - 1];
}
break;
}
}
regfree(&regex);
if (operation->empty()) {
BPLOG(ERROR) << "Failed to parse out operation from objdump instruction.";
return false;
}
// Split operands into source and destination (if applicable).
if (!operands.empty()) {
size_t delim = operands.find(',');
if (delim == string::npos) {
*dest = operands;
} else {
*dest = operands.substr(0, delim);
*src = operands.substr(delim + 1);
}
}
return true;
}
bool ExploitabilityLinux::DisassembleBytes(const string& architecture,
const uint8_t* raw_bytes,
const unsigned int raw_bytes_len,
const unsigned int buffer_len,
char* objdump_output_buffer) {
if (!raw_bytes || !objdump_output_buffer ||
raw_bytes_len > MAX_INSTRUCTION_LEN) {
BPLOG(ERROR) << "Bad input parameters.";
return false;
}
// Write raw bytes around instruction pointer to a temporary file to
// pass as an argument to objdump.
char raw_bytes_tmpfile[] = "/tmp/breakpad_mem_region-raw_bytes-XXXXXX";
int raw_bytes_fd = mkstemp(raw_bytes_tmpfile);
if (raw_bytes_fd < 0) {
BPLOG(ERROR) << "Failed to create tempfile.";
unlink(raw_bytes_tmpfile);
return false;
}
// Casting raw_bytes_len to `ssize_t` won't cause a sign flip, since we check
// its bounds above.
if (write(raw_bytes_fd, raw_bytes, raw_bytes_len) != (ssize_t)raw_bytes_len) {
BPLOG(ERROR) << "Writing of raw bytes failed.";
unlink(raw_bytes_tmpfile);
return false;
}
char cmd[1024] = {0};
snprintf(cmd,
1024,
"objdump -D -b binary -M intel -m %s %s",
architecture.c_str(),
raw_bytes_tmpfile);
FILE* objdump_fp = popen(cmd, "r");
if (!objdump_fp) {
unlink(raw_bytes_tmpfile);
BPLOG(ERROR) << "Failed to call objdump.";
return false;
}
if (fread(objdump_output_buffer, 1, buffer_len, objdump_fp) <= 0) {
pclose(objdump_fp);
unlink(raw_bytes_tmpfile);
BPLOG(ERROR) << "Failed to read objdump output.";
return false;
}
pclose(objdump_fp);
unlink(raw_bytes_tmpfile);
return true;
}
#endif // _WIN32
bool ExploitabilityLinux::StackPointerOffStack(uint64_t stack_ptr) {
MinidumpLinuxMapsList* linux_maps_list = dump_->GetLinuxMapsList();
// Inconclusive if there are no mappings available.