Add DisassemblerObjdump.

This extracts the existing objdump-based disassembler engine used in
ExploitabilityLinux into a seperate reusable class, and adds support
for most common address operand formats.

This is a precursor to using DisassemblerObjdump to handle address
resolution for non-canonical address dereferences on amd64.

Bug: 901847
Change-Id: I1a06a86fc2e7c76b4d0e79eca5f8a6c501379f47
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/3720740
Reviewed-by: Ivan Penkov <ivanpe@google.com>
Reviewed-by: Ivan Penkov <ivanpe@chromium.org>
This commit is contained in:
Mark Brand 2022-10-07 10:43:07 +02:00 committed by Ivan Penkov
parent bcffe4fe60
commit 6289830b67
13 changed files with 4102 additions and 2282 deletions

View file

@ -18,18 +18,24 @@
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `rustc_demangle' library (-lrustc_demangle). */
#undef HAVE_LIBRUSTC_DEMANGLE
/* Define to 1 if you have the `memfd_create' function. */
#undef HAVE_MEMFD_CREATE
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define if you have POSIX threads libraries and header files. */
#undef HAVE_PTHREAD
/* Define to 1 if you have the <rustc_demangle.h> header file. */
#undef HAVE_RUSTC_DEMANGLE_H
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdio.h> header file. */
#undef HAVE_STDIO_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
@ -82,17 +88,14 @@
your system. */
#undef PTHREAD_CREATE_JOINABLE
/* Define to 1 if you have the ANSI C header files. */
/* Define to 1 if all of the C90 standard headers exist (not just the ones
required in a freestanding environment). This macro is provided for
backward compatibility; new code need not use it. */
#undef STDC_HEADERS
/* Version number of package */
#undef VERSION
/* Enable large inode numbers on Mac OS X 10.5. */
#ifndef _DARWIN_USE_64_BIT_INODE
# define _DARWIN_USE_64_BIT_INODE 1
#endif
/* Number of bits in a file offset, on hosts where this is settable. */
#undef _FILE_OFFSET_BITS

View file

@ -0,0 +1,520 @@
// Copyright (c) 2022, Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// disassembler_objdump.: Disassembler that invokes objdump for disassembly.
//
// Author: Mark Brand
#include "processor/disassembler_objdump.h"
#ifdef __linux__
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <iterator>
#include <regex>
#include <sstream>
#include <vector>
#include "processor/logging.h"
namespace google_breakpad {
namespace {
const size_t kMaxX86InstructionLength = 15;
// Small RAII wrapper for temporary files.
//
// Example:
// ScopedTmpFile tmp("/tmp/tmpfile-XXXX");
// if (tmp.Create()) {
// std::cerr << tmp.path() << std::endl;
// }
class ScopedTmpFile {
public:
// Initialize the ScopedTmpFile object - this does not create the temporary
// file yet.
ScopedTmpFile(const char* path_format);
~ScopedTmpFile();
// Creates the temporary file, returns true on success.
bool Create();
// Writes bytes to the temporary file, returns true on success.
bool Write(const uint8_t* bytes, unsigned int bytes_len);
// Returns the path of the temporary file.
string path() const { return path_; }
private:
int fd_;
string path_;
};
ScopedTmpFile::ScopedTmpFile(const char* path_format) : path_(path_format) {}
ScopedTmpFile::~ScopedTmpFile() {
if (fd_) {
close(fd_);
unlink(path_.c_str());
}
}
bool ScopedTmpFile::Create() {
fd_ = mkstemp(path_.data());
if (fd_ < 0) {
unlink(path_.c_str());
fd_ = 0;
path_ = "";
return false;
}
return true;
}
bool ScopedTmpFile::Write(const uint8_t* bytes, unsigned int bytes_len) {
if (fd_) {
do {
ssize_t result = write(fd_, bytes, bytes_len);
if (result < 0) {
break;
}
bytes += result;
bytes_len -= result;
} while (bytes_len);
}
return bytes_len == 0;
}
bool IsInstructionPrefix(const string& token) {
if (token == "lock" || token == "rep" || token == "repz" ||
token == "repnz") {
return true;
}
return false;
}
bool IsOperandSize(const string& token) {
if (token == "BYTE" || token == "WORD" || token == "DWORD" ||
token == "QWORD" || token == "PTR") {
return true;
}
return false;
}
bool GetSegmentAddressX86(const DumpContext& context, string segment_name,
uint64_t& address) {
if (segment_name == "ds") {
address = context.GetContextX86()->ds;
} else if (segment_name == "es") {
address = context.GetContextX86()->es;
} else if (segment_name == "fs") {
address = context.GetContextX86()->fs;
} else if (segment_name == "gs") {
address = context.GetContextX86()->gs;
} else {
BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
return false;
}
return true;
}
bool GetSegmentAddressAMD64(const DumpContext& context, string segment_name,
uint64_t& address) {
if (segment_name == "ds") {
address = 0;
} else if (segment_name == "es") {
address = 0;
} else {
BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
return false;
}
return true;
}
bool GetSegmentAddress(const DumpContext& context, string segment_name,
uint64_t& address) {
if (context.GetContextCPU() == MD_CONTEXT_X86) {
return GetSegmentAddressX86(context, segment_name, address);
} else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
return GetSegmentAddressAMD64(context, segment_name, address);
} else {
BPLOG(ERROR) << "Unsupported architecture for GetSegmentAddress\n";
return false;
}
}
bool GetRegisterValueX86(const DumpContext& context, string register_name,
uint64_t& value) {
if (register_name == "eax") {
value = context.GetContextX86()->eax;
} else if (register_name == "ebx") {
value = context.GetContextX86()->ebx;
} else if (register_name == "ecx") {
value = context.GetContextX86()->ecx;
} else if (register_name == "edx") {
value = context.GetContextX86()->edx;
} else if (register_name == "edi") {
value = context.GetContextX86()->edi;
} else if (register_name == "esi") {
value = context.GetContextX86()->esi;
} else if (register_name == "ebp") {
value = context.GetContextX86()->ebp;
} else if (register_name == "esp") {
value = context.GetContextX86()->esp;
} else if (register_name == "eip") {
value = context.GetContextX86()->eip;
} else {
BPLOG(ERROR) << "Unsupported register: " << register_name;
return false;
}
return true;
}
bool GetRegisterValueAMD64(const DumpContext& context, string register_name,
uint64_t& value) {
if (register_name == "rax") {
value = context.GetContextAMD64()->rax;
} else if (register_name == "rbx") {
value = context.GetContextAMD64()->rbx;
} else if (register_name == "rcx") {
value = context.GetContextAMD64()->rcx;
} else if (register_name == "rdx") {
value = context.GetContextAMD64()->rdx;
} else if (register_name == "rdi") {
value = context.GetContextAMD64()->rdi;
} else if (register_name == "rsi") {
value = context.GetContextAMD64()->rsi;
} else if (register_name == "rbp") {
value = context.GetContextAMD64()->rbp;
} else if (register_name == "rsp") {
value = context.GetContextAMD64()->rsp;
} else if (register_name == "r8") {
value = context.GetContextAMD64()->r8;
} else if (register_name == "r9") {
value = context.GetContextAMD64()->r9;
} else if (register_name == "r10") {
value = context.GetContextAMD64()->r10;
} else if (register_name == "r11") {
value = context.GetContextAMD64()->r11;
} else if (register_name == "r12") {
value = context.GetContextAMD64()->r12;
} else if (register_name == "r13") {
value = context.GetContextAMD64()->r13;
} else if (register_name == "r14") {
value = context.GetContextAMD64()->r14;
} else if (register_name == "r15") {
value = context.GetContextAMD64()->r15;
} else if (register_name == "rip") {
value = context.GetContextAMD64()->rip;
} else {
BPLOG(ERROR) << "Unsupported register: " << register_name;
return false;
}
return true;
}
// Lookup the value of `register_name` in `context`, store it into `value` on
// success.
// Support for non-full-size registers not implemented, since we're only using
// this to evaluate address expressions.
bool GetRegisterValue(const DumpContext& context, string register_name,
uint64_t& value) {
if (context.GetContextCPU() == MD_CONTEXT_X86) {
return GetRegisterValueX86(context, register_name, value);
} else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
return GetRegisterValueAMD64(context, register_name, value);
} else {
BPLOG(ERROR) << "Unsupported architecture for GetRegisterValue\n";
return false;
}
}
} // namespace
// static
bool DisassemblerObjdump::DisassembleInstruction(uint32_t cpu,
const uint8_t* raw_bytes,
unsigned int raw_bytes_len,
string& instruction) {
// Always initialize outputs
instruction = "";
if (!raw_bytes || raw_bytes_len == 0) {
// There's no need to perform any operation in this case, as there's
// clearly no instruction there.
return false;
}
string architecture;
if (cpu == MD_CONTEXT_X86) {
architecture = "i386";
} else if (cpu == MD_CONTEXT_AMD64) {
architecture = "i386:x86-64";
} else {
BPLOG(ERROR) << "Unsupported architecture.";
return false;
}
// Create two temporary files, one for the raw instruction bytes to pass to
// objdump, and one for the output, and write the bytes to the input file.
ScopedTmpFile raw_bytes_file("/tmp/breakpad_mem_region-raw_bytes-XXXXXX");
ScopedTmpFile disassembly_file("/tmp/breakpad_mem_region-disassembly-XXXXXX");
if (!raw_bytes_file.Create() || !disassembly_file.Create() ||
!raw_bytes_file.Write(raw_bytes, raw_bytes_len)) {
BPLOG(ERROR) << "Failed creating temporary files.";
return false;
}
char cmd[1024] = {0};
snprintf(cmd, 1024,
"objdump -D --no-show-raw-insn -b binary -M intel -m %s %s > %s",
architecture.c_str(), raw_bytes_file.path().c_str(),
disassembly_file.path().c_str());
if (system(cmd)) {
BPLOG(ERROR) << "Failed to call objdump.";
return false;
}
// Pipe each output line into the string until the string contains the first
// instruction from objdump.
std::ifstream objdump_stream(disassembly_file.path());
// Match the instruction line, from:
// 0: lock cmpxchg DWORD PTR [esi+0x10],eax
// extract the string "lock cmpxchg DWORD PTR [esi+0x10],eax"
std::regex instruction_regex(
"^\\s+[0-9a-f]+:\\s+" // " 0:"
"((?:\\s*\\S*)+)$"); // "lock cmpxchg..."
std::string line;
std::smatch match;
do {
if (!getline(objdump_stream, line)) {
BPLOG(INFO) << "Failed to find instruction in objdump output.";
return false;
}
} while (!std::regex_match(line, match, instruction_regex));
instruction = match[1].str();
return true;
}
// static
bool DisassemblerObjdump::TokenizeInstruction(const string& instruction,
string& operation, string& dest,
string& src) {
// Always initialize outputs.
operation = "";
dest = "";
src = "";
// Split the instruction into tokens by either whitespace or comma.
std::regex token_regex("((?:[^\\s,]+)|,)(?:\\s)*");
std::sregex_iterator tokens_begin(instruction.begin(), instruction.end(),
token_regex);
bool found_comma = false;
for (auto tokens_iter = tokens_begin; tokens_iter != std::sregex_iterator();
++tokens_iter) {
auto token = (*tokens_iter)[1].str();
if (operation.size() == 0) {
if (IsInstructionPrefix(token))
continue;
operation = token;
} else if (dest.size() == 0) {
if (IsOperandSize(token))
continue;
dest = token;
} else if (!found_comma) {
if (token == ",") {
found_comma = true;
} else {
BPLOG(ERROR) << "Failed to parse operands from objdump output, expected"
" comma but found \""
<< token << "\"";
return false;
}
} else if (src.size() == 0) {
if (IsOperandSize(token))
continue;
src = token;
} else {
if (token == ",") {
BPLOG(ERROR) << "Failed to parse operands from objdump output, found "
"unexpected comma after last operand.";
return false;
} else {
// We just ignore other junk after the last operand unless it's a
// comma, which would indicate we're probably still in the middle
// of the operands and something has gone wrong
}
}
}
if (found_comma && src.size() == 0) {
BPLOG(ERROR) << "Failed to parse operands from objdump output, found comma "
"but no src operand.";
return false;
}
return true;
}
// static
bool DisassemblerObjdump::CalculateAddress(const DumpContext& context,
const string& expression,
uint64_t& address) {
address = 0;
// Extract the components of the expression.
// fs:[esi+edi*4+0x80] -> ["fs", "esi", "edi", "4", "-", "0x80"]
std::regex expression_regex(
"^(?:(\\ws):)?" // "fs:"
"\\[(\\w+)" // "[esi"
"(?:\\+(\\w+)(?:\\*(\\d+)))?" // "+edi*4"
"(?:([\\+-])(0x[0-9a-f]+))?" // "-0x80"
"\\]$"); // "]"
std::smatch match;
if (!std::regex_match(expression, match, expression_regex) ||
match.size() != 7) {
return false;
}
string segment_name = match[1].str();
string register_name = match[2].str();
string index_name = match[3].str();
string index_stride = match[4].str();
string offset_sign = match[5].str();
string offset = match[6].str();
uint64_t segment_address = 0;
uint64_t register_value = 0;
uint64_t index_value = 0;
uint64_t index_stride_value = 1;
uint64_t offset_value = 0;
if (segment_name.size() &&
!GetSegmentAddress(context, segment_name, segment_address)) {
return false;
}
if (!GetRegisterValue(context, register_name, register_value)) {
return false;
}
if (index_name.size() &&
!GetRegisterValue(context, index_name, index_value)) {
return false;
}
if (index_stride.size()) {
index_stride_value = strtoull(index_stride.c_str(), nullptr, 0);
}
if (offset.size()) {
offset_value = strtoull(offset.c_str(), nullptr, 0);
}
address =
segment_address + register_value + (index_value * index_stride_value);
if (offset_sign == "+") {
address += offset_value;
} else if (offset_sign == "-") {
address -= offset_value;
}
return true;
}
DisassemblerObjdump::DisassemblerObjdump(const uint32_t cpu,
const MemoryRegion* memory_region,
uint64_t address) {
if (address < memory_region->GetBase() ||
memory_region->GetBase() + memory_region->GetSize() <= address) {
return;
}
uint8_t ip_bytes[kMaxX86InstructionLength] = {0};
size_t ip_bytes_length;
for (ip_bytes_length = 0; ip_bytes_length < kMaxX86InstructionLength;
++ip_bytes_length) {
// We have to read byte-by-byte here, since we still want to try and
// disassemble an instruction even if we don't have enough bytes.
if (!memory_region->GetMemoryAtAddress(address + ip_bytes_length,
&ip_bytes[ip_bytes_length])) {
break;
}
}
string instruction;
if (!DisassembleInstruction(cpu, ip_bytes, kMaxX86InstructionLength,
instruction)) {
return;
}
if (!TokenizeInstruction(instruction, operation_, dest_, src_)) {
return;
}
}
bool DisassemblerObjdump::CalculateSrcAddress(const DumpContext& context,
uint64_t& address) {
return CalculateAddress(context, src_, address);
}
bool DisassemblerObjdump::CalculateDestAddress(const DumpContext& context,
uint64_t& address) {
return CalculateAddress(context, dest_, address);
}
} // namespace google_breakpad
#else // __linux__
namespace google_breakpad {
DisassemblerObjdump::DisassemblerObjdump(const uint32_t cpu,
const MemoryRegion* memory_region,
uint64_t address) {}
bool DisassemblerObjdump::CalculateSrcAddress(const DumpContext& context,
uint64_t* address) {
return false;
}
bool DisassemblerObjdump::CalculateDestAddress(const DumpContext& context,
uint64_t* address) {
return false;
}
} // namespace google_breakpad
#endif // __linux__

View file

@ -0,0 +1,142 @@
// Copyright (c) 2022, Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// disassembler_objdump.h: Disassembler that invokes objdump for disassembly.
//
// Author: Mark Brand
#ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_
#define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_
#include <string>
#include "common/using_std_string.h"
#include "google_breakpad/common/breakpad_types.h"
#include "google_breakpad/processor/dump_context.h"
#include "google_breakpad/processor/memory_region.h"
namespace google_breakpad {
// Uses objdump to disassemble a single instruction.
//
// Currently supports disassembly for x86 and x86_64 on linux hosts only; on
// unsupported platform or for unsupported architectures disassembly will fail.
//
// If disassembly is successful, then this allows extracting the instruction
// opcode, source and destination operands, and computing the source and
// destination addresses for instructions that operate on memory.
//
// Example:
// DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region,
// instruction_ptr);
// if (disassembler.IsValid()) {
// uint64_t src_address = 0;
// std::cerr << disassembler.operation() << " " << disassembler.src()
// << ", " << disassembler.dest() << std::endl;
// if (disassembler.CalculateSrcAddress(*context, src_address)) {
// std::cerr << "[src_address = " << std::hex << src_address << "]\n";
// }
// }
class DisassemblerObjdump {
public:
// Construct an ObjdumpDisassembler for the provided `cpu` type, where this is
// one of MD_CONTEXT_X86 or MD_CONTEXT_AMD64. Provided that `address` is
// within `memory_region`, and the memory referenced is a valid instruction,
// this will then be initialized with the disassembly for that instruction.
DisassemblerObjdump(uint32_t cpu,
const MemoryRegion* memory_region,
uint64_t address);
~DisassemblerObjdump() = default;
// If the source operand of the instruction is a memory operand, compute the
// address referred to by the operand, and store this in `address`. On success
// returns true, otherwise (if computation fails, or if the source operand is
// not a memory operand) returns false and sets `address` to 0.
bool CalculateSrcAddress(const DumpContext& context, uint64_t& address);
// If the destination operand of the instruction is a memory operand, compute
// the address referred to by the operand, and store this in `address`. On
// success returns true, otherwise (if computation fails, or if the source
// operand is not a memory operand) returns false and sets `address` to 0.
bool CalculateDestAddress(const DumpContext& context, uint64_t& address);
// If the instruction was disassembled successfully, this will be true.
bool IsValid() const { return operation_.size() != 0; }
// Returns the operation part of the disassembly, without any prefixes:
// "pop" eax
// lock "xchg" eax, edx
const string& operation() const { return operation_; }
// Returns the destination operand of the disassembly, without memory operand
// size prefixes:
// mov DWORD PTR "[rax + 16]", edx
const string& dest() const { return dest_; }
// Returns the source operand of the disassembly, without memory operand
// size prefixes:
// mov rax, QWORD PTR "[rdx]"
const string& src() const { return src_; }
private:
friend class DisassemblerObjdumpForTest;
// Writes out the provided `raw_bytes` to a temporary file, and executes objdump
// to disassemble according to `cpu`, which must be either MD_CONTEXT_X86 or
// MD_CONTEXT_AMD64. Once objdump has completed, parses out the instruction
// string from the first instruction in the output and stores it in
// `instruction`.
static bool DisassembleInstruction(uint32_t cpu, const uint8_t* raw_bytes,
unsigned int raw_bytes_len,
string& instruction);
// Splits an `instruction` into three parts, the "main" `operation` and
// the `dest` and `src` operands.
// Example:
// instruction = "lock cmpxchg QWORD PTR [rdi], rsi"
// operation = "cmpxchg", dest = "[rdi]", src = "rsi"
static bool TokenizeInstruction(const string& instruction, string& operation,
string& dest, string& src);
// Compute the address referenced by `expression` in `context`.
// Supports memory operands in the form
// (segment:)[base_reg(+index_reg*index_stride)(+-offset)]
// Returns false if evaluation fails, or if the operand is not a supported
// memory operand.
static bool CalculateAddress(const DumpContext& context,
const string& expression,
uint64_t& address);
// The parsed components of the disassembly for the instruction.
string operation_ = "";
string dest_ = "";
string src_ = "";
};
} // namespace google_breakpad
#endif // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_OBJDUMP_H_

View file

@ -0,0 +1,464 @@
// Copyright (c) 2022, Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <unistd.h>
#include <vector>
#include "breakpad_googletest_includes.h"
#include "google_breakpad/common/breakpad_types.h"
#include "google_breakpad/common/minidump_cpu_amd64.h"
#include "google_breakpad/common/minidump_cpu_x86.h"
#include "google_breakpad/processor/dump_context.h"
#include "google_breakpad/processor/memory_region.h"
#include "processor/disassembler_objdump.h"
namespace google_breakpad {
class DisassemblerObjdumpForTest : public DisassemblerObjdump {
public:
using DisassemblerObjdump::CalculateAddress;
using DisassemblerObjdump::DisassembleInstruction;
using DisassemblerObjdump::TokenizeInstruction;
};
class TestMemoryRegion : public MemoryRegion {
public:
TestMemoryRegion(uint64_t base, std::vector<uint8_t> bytes);
~TestMemoryRegion() override = default;
uint64_t GetBase() const override;
uint32_t GetSize() const override;
bool GetMemoryAtAddress(uint64_t address, uint8_t* value) const override;
bool GetMemoryAtAddress(uint64_t address, uint16_t* value) const override;
bool GetMemoryAtAddress(uint64_t address, uint32_t* value) const override;
bool GetMemoryAtAddress(uint64_t address, uint64_t* value) const override;
void Print() const override;
private:
uint64_t base_;
std::vector<uint8_t> bytes_;
};
TestMemoryRegion::TestMemoryRegion(uint64_t address, std::vector<uint8_t> bytes)
: base_(address), bytes_(bytes) {}
uint64_t TestMemoryRegion::GetBase() const {
return base_;
}
uint32_t TestMemoryRegion::GetSize() const {
return static_cast<uint32_t>(bytes_.size());
}
bool TestMemoryRegion::GetMemoryAtAddress(uint64_t address,
uint8_t* value) const {
if (address < GetBase() ||
address + sizeof(uint8_t) > GetBase() + GetSize()) {
return false;
}
memcpy(value, &bytes_[address - GetBase()], sizeof(uint8_t));
return true;
}
// We don't use the following functions, so no need to implement.
bool TestMemoryRegion::GetMemoryAtAddress(uint64_t address,
uint16_t* value) const {
return false;
}
bool TestMemoryRegion::GetMemoryAtAddress(uint64_t address,
uint32_t* value) const {
return false;
}
bool TestMemoryRegion::GetMemoryAtAddress(uint64_t address,
uint64_t* value) const {
return false;
}
void TestMemoryRegion::Print() const {}
const uint32_t kX86TestDs = 0x01000000;
const uint32_t kX86TestEs = 0x02000000;
const uint32_t kX86TestFs = 0x03000000;
const uint32_t kX86TestGs = 0x04000000;
const uint32_t kX86TestEax = 0x00010101;
const uint32_t kX86TestEbx = 0x00020202;
const uint32_t kX86TestEcx = 0x00030303;
const uint32_t kX86TestEdx = 0x00040404;
const uint32_t kX86TestEsi = 0x00050505;
const uint32_t kX86TestEdi = 0x00060606;
const uint32_t kX86TestEsp = 0x00070707;
const uint32_t kX86TestEbp = 0x00080808;
const uint32_t kX86TestEip = 0x23230000;
const uint64_t kAMD64TestRax = 0x0000010101010101ul;
const uint64_t kAMD64TestRbx = 0x0000020202020202ul;
const uint64_t kAMD64TestRcx = 0x0000030303030303ul;
const uint64_t kAMD64TestRdx = 0x0000040404040404ul;
const uint64_t kAMD64TestRsi = 0x0000050505050505ul;
const uint64_t kAMD64TestRdi = 0x0000060606060606ul;
const uint64_t kAMD64TestRsp = 0x0000070707070707ul;
const uint64_t kAMD64TestRbp = 0x0000080808080808ul;
const uint64_t kAMD64TestR8 = 0x0000090909090909ul;
const uint64_t kAMD64TestR9 = 0x00000a0a0a0a0a0aul;
const uint64_t kAMD64TestR10 = 0x00000b0b0b0b0b0bul;
const uint64_t kAMD64TestR11 = 0x00000c0c0c0c0c0cul;
const uint64_t kAMD64TestR12 = 0x00000d0d0d0d0d0dul;
const uint64_t kAMD64TestR13 = 0x00000e0e0e0e0e0eul;
const uint64_t kAMD64TestR14 = 0x00000f0f0f0f0f0ful;
const uint64_t kAMD64TestR15 = 0x0000001010101010ul;
const uint64_t kAMD64TestRip = 0x0000000023230000ul;
class TestDumpContext : public DumpContext {
public:
TestDumpContext(bool x86_64 = false);
~TestDumpContext() override;
};
TestDumpContext::TestDumpContext(bool x86_64) {
if (!x86_64) {
MDRawContextX86* raw_context = new MDRawContextX86();
memset(raw_context, 0, sizeof(raw_context));
raw_context->context_flags = MD_CONTEXT_X86_FULL;
raw_context->ds = kX86TestDs;
raw_context->es = kX86TestEs;
raw_context->fs = kX86TestFs;
raw_context->gs = kX86TestGs;
raw_context->eax = kX86TestEax;
raw_context->ebx = kX86TestEbx;
raw_context->ecx = kX86TestEcx;
raw_context->edx = kX86TestEdx;
raw_context->esi = kX86TestEsi;
raw_context->edi = kX86TestEdi;
raw_context->esp = kX86TestEsp;
raw_context->ebp = kX86TestEbp;
raw_context->eip = kX86TestEip;
SetContextFlags(raw_context->context_flags);
SetContextX86(raw_context);
this->valid_ = true;
} else {
MDRawContextAMD64* raw_context = new MDRawContextAMD64();
memset(raw_context, 0, sizeof(raw_context));
raw_context->context_flags = MD_CONTEXT_AMD64_FULL;
raw_context->rax = kAMD64TestRax;
raw_context->rbx = kAMD64TestRbx;
raw_context->rcx = kAMD64TestRcx;
raw_context->rdx = kAMD64TestRdx;
raw_context->rsi = kAMD64TestRsi;
raw_context->rdi = kAMD64TestRdi;
raw_context->rsp = kAMD64TestRsp;
raw_context->rbp = kAMD64TestRbp;
raw_context->r8 = kAMD64TestR8;
raw_context->r9 = kAMD64TestR9;
raw_context->r10 = kAMD64TestR10;
raw_context->r11 = kAMD64TestR11;
raw_context->r12 = kAMD64TestR12;
raw_context->r13 = kAMD64TestR13;
raw_context->r14 = kAMD64TestR14;
raw_context->r15 = kAMD64TestR15;
raw_context->rip = kAMD64TestRip;
SetContextFlags(raw_context->context_flags);
SetContextAMD64(raw_context);
this->valid_ = true;
}
}
TestDumpContext::~TestDumpContext() {
FreeContext();
}
TEST(DisassemblerObjdumpTest, DisassembleInstructionX86) {
string instruction;
ASSERT_FALSE(DisassemblerObjdumpForTest::DisassembleInstruction(
MD_CONTEXT_X86, nullptr, 0, instruction));
std::vector<uint8_t> pop_eax = {0x58};
ASSERT_TRUE(DisassemblerObjdumpForTest::DisassembleInstruction(
MD_CONTEXT_X86, pop_eax.data(), pop_eax.size(), instruction));
ASSERT_EQ(instruction, "pop eax");
}
TEST(DisassemblerObjdumpTest, DisassembleInstructionAMD64) {
string instruction;
ASSERT_FALSE(DisassemblerObjdumpForTest::DisassembleInstruction(
MD_CONTEXT_AMD64, nullptr, 0, instruction));
std::vector<uint8_t> pop_rax = {0x58};
ASSERT_TRUE(DisassemblerObjdumpForTest::DisassembleInstruction(
MD_CONTEXT_AMD64, pop_rax.data(), pop_rax.size(), instruction));
ASSERT_EQ(instruction, "pop rax");
}
TEST(DisassemblerObjdumpTest, TokenizeInstruction) {
string operation, dest, src;
ASSERT_TRUE(DisassemblerObjdumpForTest::TokenizeInstruction(
"pop eax", operation, dest, src));
ASSERT_EQ(operation, "pop");
ASSERT_EQ(dest, "eax");
ASSERT_TRUE(DisassemblerObjdumpForTest::TokenizeInstruction(
"mov eax, ebx", operation, dest, src));
ASSERT_EQ(operation, "mov");
ASSERT_EQ(dest, "eax");
ASSERT_EQ(src, "ebx");
ASSERT_TRUE(DisassemblerObjdumpForTest::TokenizeInstruction(
"pop rax", operation, dest, src));
ASSERT_EQ(operation, "pop");
ASSERT_EQ(dest, "rax");
ASSERT_TRUE(DisassemblerObjdumpForTest::TokenizeInstruction(
"mov rax, rbx", operation, dest, src));
ASSERT_EQ(operation, "mov");
ASSERT_EQ(dest, "rax");
ASSERT_EQ(src, "rbx");
// Test the three parsing failure paths
ASSERT_FALSE(DisassemblerObjdumpForTest::TokenizeInstruction(
"mov rax,", operation, dest, src));
ASSERT_FALSE(DisassemblerObjdumpForTest::TokenizeInstruction(
"mov rax rbx", operation, dest, src));
ASSERT_FALSE(DisassemblerObjdumpForTest::TokenizeInstruction(
"mov rax, rbx, rcx", operation, dest, src));
// This is of course a nonsense instruction, but test that we do remove
// multiple instruction prefixes and can handle multiple memory operands.
ASSERT_TRUE(DisassemblerObjdumpForTest::TokenizeInstruction(
"rep lock mov DWORD PTR rax, QWORD PTR rbx", operation, dest, src));
ASSERT_EQ(operation, "mov");
ASSERT_EQ(dest, "rax");
ASSERT_EQ(src, "rbx");
// Test that we ignore junk following a valid instruction
ASSERT_TRUE(DisassemblerObjdumpForTest::TokenizeInstruction(
"mov rax, rbx ; junk here", operation, dest, src));
ASSERT_EQ(operation, "mov");
ASSERT_EQ(dest, "rax");
ASSERT_EQ(src, "rbx");
}
namespace x86 {
const TestMemoryRegion load_reg(kX86TestEip, {0x8b, 0x06}); // mov eax, [esi];
const TestMemoryRegion load_reg_index(kX86TestEip,
{0x8b, 0x04,
0xbe}); // mov eax, [esi+edi*4];
const TestMemoryRegion load_reg_offset(kX86TestEip,
{0x8b, 0x46,
0x10}); // mov eax, [esi+0x10];
const TestMemoryRegion load_reg_index_offset(
kX86TestEip,
{0x8b, 0x44, 0xbe, 0xf0}); // mov eax, [esi+edi*4-0x10];
const TestMemoryRegion rep_stosb(kX86TestEip, {0xf3, 0xaa}); // rep stosb;
const TestMemoryRegion lock_cmpxchg(kX86TestEip,
{0xf0, 0x0f, 0xb1, 0x46,
0x10}); // lock cmpxchg [esi + 0x10], eax;
const TestMemoryRegion call_reg_offset(kX86TestEip,
{0xff, 0x96, 0x99, 0x99, 0x99,
0x09}); // call [esi+0x9999999];
} // namespace x86
TEST(DisassemblerObjdumpTest, X86LoadReg) {
TestDumpContext context;
DisassemblerObjdump dis(context.GetContextCPU(), &x86::load_reg, kX86TestEip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_FALSE(dis.CalculateDestAddress(context, dest_address));
ASSERT_TRUE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(src_address, kX86TestEsi);
}
TEST(DisassemblerObjdumpTest, X86LoadRegIndex) {
TestDumpContext context;
DisassemblerObjdump dis(context.GetContextCPU(), &x86::load_reg_index,
kX86TestEip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_FALSE(dis.CalculateDestAddress(context, dest_address));
ASSERT_TRUE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(src_address, kX86TestEsi + (kX86TestEdi * 4));
}
TEST(DisassemblerObjdumpTest, X86LoadRegOffset) {
TestDumpContext context;
DisassemblerObjdump dis(context.GetContextCPU(), &x86::load_reg_offset,
kX86TestEip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_FALSE(dis.CalculateDestAddress(context, dest_address));
ASSERT_TRUE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(src_address, kX86TestEsi + 0x10);
}
TEST(DisassemblerObjdumpTest, X86LoadRegIndexOffset) {
TestDumpContext context;
DisassemblerObjdump dis(context.GetContextCPU(), &x86::load_reg_index_offset,
kX86TestEip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_FALSE(dis.CalculateDestAddress(context, dest_address));
ASSERT_TRUE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(src_address, kX86TestEsi + (kX86TestEdi * 4) - 0x10);
}
TEST(DisassemblerObjdumpTest, X86RepStosb) {
TestDumpContext context;
DisassemblerObjdump dis(context.GetContextCPU(), &x86::rep_stosb,
kX86TestEip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_TRUE(dis.CalculateDestAddress(context, dest_address));
ASSERT_FALSE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(dest_address, kX86TestEs + kX86TestEdi);
}
TEST(DisassemblerObjdumpTest, X86LockCmpxchg) {
TestDumpContext context;
DisassemblerObjdump dis(context.GetContextCPU(), &x86::lock_cmpxchg,
kX86TestEip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_TRUE(dis.CalculateDestAddress(context, dest_address));
ASSERT_FALSE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(dest_address, kX86TestEsi + 0x10);
}
TEST(DisassemblerObjdumpTest, X86CallRegOffset) {
TestDumpContext context;
DisassemblerObjdump dis(context.GetContextCPU(), &x86::call_reg_offset,
kX86TestEip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_TRUE(dis.CalculateDestAddress(context, dest_address));
ASSERT_FALSE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(dest_address, kX86TestEsi + 0x9999999);
}
namespace amd64 {
const TestMemoryRegion load_reg(kAMD64TestRip,
{0x48, 0x8b, 0x06}); // mov rax, [rsi];
const TestMemoryRegion load_reg_index(kAMD64TestRip,
{0x48, 0x8b, 0x04,
0xbe}); // mov rax, [rsi+rdi*4];
const TestMemoryRegion load_rip_relative(kAMD64TestRip,
{0x48, 0x8b, 0x05, 0x10, 0x00, 0x00,
0x00}); // mov rax, [rip+0x10];
const TestMemoryRegion load_reg_index_offset(
kAMD64TestRip,
{0x48, 0x8b, 0x44, 0xbe, 0xf0}); // mov rax, [rsi+rdi*4-0x10];
const TestMemoryRegion rep_stosb(kAMD64TestRip, {0xf3, 0xaa}); // rep stosb;
const TestMemoryRegion lock_cmpxchg(kAMD64TestRip,
{0xf0, 0x48, 0x0f, 0xb1, 0x46,
0x10}); // lock cmpxchg [rsi + 0x10], rax;
const TestMemoryRegion call_reg_offset(kAMD64TestRip,
{0xff, 0x96, 0x99, 0x99, 0x99,
0x09}); // call [rsi+0x9999999];
} // namespace amd64
TEST(DisassemblerObjdumpTest, AMD64LoadReg) {
TestDumpContext context(true);
DisassemblerObjdump dis(context.GetContextCPU(), &amd64::load_reg,
kAMD64TestRip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_FALSE(dis.CalculateDestAddress(context, dest_address));
ASSERT_TRUE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(src_address, kAMD64TestRsi);
}
TEST(DisassemblerObjdumpTest, AMD64LoadRegIndex) {
TestDumpContext context(true);
DisassemblerObjdump dis(context.GetContextCPU(), &amd64::load_reg_index,
kAMD64TestRip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_FALSE(dis.CalculateDestAddress(context, dest_address));
ASSERT_TRUE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(src_address, kAMD64TestRsi + (kAMD64TestRdi * 4));
}
TEST(DisassemblerObjdumpTest, AMD64LoadRipRelative) {
TestDumpContext context(true);
DisassemblerObjdump dis(context.GetContextCPU(), &amd64::load_rip_relative,
kAMD64TestRip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_FALSE(dis.CalculateDestAddress(context, dest_address));
ASSERT_TRUE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(src_address, kAMD64TestRip + 0x10);
}
TEST(DisassemblerObjdumpTest, AMD64LoadRegIndexOffset) {
TestDumpContext context(true);
DisassemblerObjdump dis(context.GetContextCPU(),
&amd64::load_reg_index_offset, kAMD64TestRip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_FALSE(dis.CalculateDestAddress(context, dest_address));
ASSERT_TRUE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(src_address, kAMD64TestRsi + (kAMD64TestRdi * 4) - 0x10);
}
TEST(DisassemblerObjdumpTest, AMD64RepStosb) {
TestDumpContext context(true);
DisassemblerObjdump dis(context.GetContextCPU(), &amd64::rep_stosb,
kAMD64TestRip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_TRUE(dis.CalculateDestAddress(context, dest_address));
ASSERT_FALSE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(dest_address, kAMD64TestRdi);
}
TEST(DisassemblerObjdumpTest, AMD64LockCmpxchg) {
TestDumpContext context(true);
DisassemblerObjdump dis(context.GetContextCPU(), &amd64::lock_cmpxchg,
kAMD64TestRip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_TRUE(dis.CalculateDestAddress(context, dest_address));
ASSERT_FALSE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(dest_address, kAMD64TestRsi + 0x10);
}
TEST(DisassemblerObjdumpTest, AMD64CallRegOffset) {
TestDumpContext context(true);
DisassemblerObjdump dis(context.GetContextCPU(), &amd64::call_reg_offset,
kAMD64TestRip);
uint64_t src_address = 0, dest_address = 0;
ASSERT_TRUE(dis.CalculateDestAddress(context, dest_address));
ASSERT_FALSE(dis.CalculateSrcAddress(context, src_address));
ASSERT_EQ(dest_address, kAMD64TestRsi + 0x9999999);
}
} // namespace google_breakpad

View file

@ -35,21 +35,13 @@
#include "processor/exploitability_linux.h"
#ifndef _WIN32
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <sstream>
#include <iterator>
#endif // _WIN32
#include <string.h>
#include "google_breakpad/common/minidump_exception_linux.h"
#include "google_breakpad/processor/call_stack.h"
#include "google_breakpad/processor/process_state.h"
#include "google_breakpad/processor/stack_frame.h"
#include "processor/disassembler_objdump.h"
#include "processor/logging.h"
namespace {
@ -67,11 +59,6 @@ constexpr char kStackCheckFailureFunction[] = "__stack_chk_fail";
// can determine that the call would overflow the target buffer.
constexpr char kBoundsCheckFailureFunction[] = "__chk_fail";
#ifndef _WIN32
const unsigned int MAX_INSTRUCTION_LEN = 15;
const unsigned int MAX_OBJDUMP_BUFFER_LEN = 4096;
#endif // _WIN32
} // namespace
namespace google_breakpad {
@ -198,69 +185,30 @@ bool ExploitabilityLinux::EndedOnIllegalWrite(uint64_t instruction_ptr) {
BPLOG(INFO) << "No exception or architecture data.";
return false;
}
// Check architecture and set architecture variable to corresponding flag
// in objdump.
switch (context->GetContextCPU()) {
case MD_CONTEXT_X86:
architecture = "i386";
break;
case MD_CONTEXT_AMD64:
architecture = "i386:x86-64";
break;
default:
// Unsupported architecture. Note that ARM architectures are not
// supported because objdump does not support ARM.
return false;
}
// Get memory region around instruction pointer and the number of bytes
// before and after the instruction pointer in the memory region.
const uint8_t* raw_memory = memory_region->GetMemory();
const uint64_t base = memory_region->GetBase();
if (base > instruction_ptr) {
BPLOG(ERROR) << "Memory region base value exceeds instruction pointer.";
return false;
}
const uint64_t offset = instruction_ptr - base;
if (memory_region->GetSize() < MAX_INSTRUCTION_LEN + offset) {
BPLOG(INFO) << "Not enough bytes left to guarantee complete instruction.";
DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region,
instruction_ptr);
if (!disassembler.IsValid()) {
BPLOG(INFO) << "Disassembling fault instruction failed.";
return false;
}
// Convert bytes into objdump output.
char objdump_output_buffer[MAX_OBJDUMP_BUFFER_LEN] = {0};
DisassembleBytes(architecture,
raw_memory + offset,
MAX_INSTRUCTION_LEN,
MAX_OBJDUMP_BUFFER_LEN,
objdump_output_buffer);
string line;
if (!GetObjdumpInstructionLine(objdump_output_buffer, &line)) {
return false;
}
// Convert objdump instruction line into the operation and operands.
string instruction = "";
string dest = "";
string src = "";
TokenizeObjdumpInstruction(line, &instruction, &dest, &src);
// Check if the operation is a write to memory. First, the instruction
// must one that can write to memory. Second, the write destination
// must be a spot in memory rather than a register. Since there are no
// symbols from objdump, the destination will be enclosed by brackets.
if (dest.size() > 2 && dest.at(0) == '[' && dest.at(dest.size() - 1) == ']' &&
(!instruction.compare("mov") || !instruction.compare("inc") ||
!instruction.compare("dec") || !instruction.compare("and") ||
!instruction.compare("or") || !instruction.compare("xor") ||
!instruction.compare("not") || !instruction.compare("neg") ||
!instruction.compare("add") || !instruction.compare("sub") ||
!instruction.compare("shl") || !instruction.compare("shr"))) {
// Strip away enclosing brackets from the destination address.
dest = dest.substr(1, dest.size() - 2);
// Check if the operation is a write to memory.
// First, the instruction must one that can write to memory.
auto instruction = disassembler.operation();
if (!instruction.compare("mov") || !instruction.compare("inc") ||
!instruction.compare("dec") || !instruction.compare("and") ||
!instruction.compare("or") || !instruction.compare("xor") ||
!instruction.compare("not") || !instruction.compare("neg") ||
!instruction.compare("add") || !instruction.compare("sub") ||
!instruction.compare("shl") || !instruction.compare("shr")) {
uint64_t write_address = 0;
CalculateAddress(dest, *context, &write_address);
// Check that the destination is a memory address. CalculateDestAddress will
// return false if the destination is not a memory address.
if (!disassembler.CalculateDestAddress(*context, write_address)) {
return false;
}
// If the program crashed as a result of a write, the destination of
// the write must have been an address that did not permit writing.
@ -268,271 +216,14 @@ bool ExploitabilityLinux::EndedOnIllegalWrite(uint64_t instruction_ptr) {
// the crash does not suggest exploitability for writes with such a
// low target address.
return write_address > 4096;
} else {
return false;
}
#endif // _WIN32
return false;
}
#ifndef _WIN32
bool ExploitabilityLinux::CalculateAddress(const string& address_expression,
const DumpContext& context,
uint64_t* write_address) {
// The destination should be the format reg+a or reg-a, where reg
// is a register and a is a hexadecimal constant. Although more complex
// expressions can make valid instructions, objdump's disassembly outputs
// it in this simpler format.
// TODO(liuandrew): Handle more complex formats, should they arise.
if (!write_address) {
BPLOG(ERROR) << "Null parameter.";
return false;
}
// Clone parameter into a non-const string.
string expression = address_expression;
// Parse out the constant that is added to the address (if it exists).
size_t delim = expression.find('+');
bool positive_add_constant = true;
// Check if constant is subtracted instead of added.
if (delim == string::npos) {
positive_add_constant = false;
delim = expression.find('-');
}
uint32_t add_constant = 0;
// Save constant and remove it from the expression.
if (delim != string::npos) {
if (!sscanf(expression.substr(delim + 1).c_str(), "%x", &add_constant)) {
BPLOG(ERROR) << "Failed to scan constant.";
return false;
}
expression = expression.substr(0, delim);
}
// Set the the write address to the corresponding register.
// TODO(liuandrew): Add support for partial registers, such as
// the rax/eax/ax/ah/al chain.
switch (context.GetContextCPU()) {
case MD_CONTEXT_X86:
if (!expression.compare("eax")) {
*write_address = context.GetContextX86()->eax;
} else if (!expression.compare("ebx")) {
*write_address = context.GetContextX86()->ebx;
} else if (!expression.compare("ecx")) {
*write_address = context.GetContextX86()->ecx;
} else if (!expression.compare("edx")) {
*write_address = context.GetContextX86()->edx;
} else if (!expression.compare("edi")) {
*write_address = context.GetContextX86()->edi;
} else if (!expression.compare("esi")) {
*write_address = context.GetContextX86()->esi;
} else if (!expression.compare("ebp")) {
*write_address = context.GetContextX86()->ebp;
} else if (!expression.compare("esp")) {
*write_address = context.GetContextX86()->esp;
} else if (!expression.compare("eip")) {
*write_address = context.GetContextX86()->eip;
} else {
BPLOG(ERROR) << "Unsupported register";
return false;
}
break;
case MD_CONTEXT_AMD64:
if (!expression.compare("rax")) {
*write_address = context.GetContextAMD64()->rax;
} else if (!expression.compare("rbx")) {
*write_address = context.GetContextAMD64()->rbx;
} else if (!expression.compare("rcx")) {
*write_address = context.GetContextAMD64()->rcx;
} else if (!expression.compare("rdx")) {
*write_address = context.GetContextAMD64()->rdx;
} else if (!expression.compare("rdi")) {
*write_address = context.GetContextAMD64()->rdi;
} else if (!expression.compare("rsi")) {
*write_address = context.GetContextAMD64()->rsi;
} else if (!expression.compare("rbp")) {
*write_address = context.GetContextAMD64()->rbp;
} else if (!expression.compare("rsp")) {
*write_address = context.GetContextAMD64()->rsp;
} else if (!expression.compare("rip")) {
*write_address = context.GetContextAMD64()->rip;
} else if (!expression.compare("r8")) {
*write_address = context.GetContextAMD64()->r8;
} else if (!expression.compare("r9")) {
*write_address = context.GetContextAMD64()->r9;
} else if (!expression.compare("r10")) {
*write_address = context.GetContextAMD64()->r10;
} else if (!expression.compare("r11")) {
*write_address = context.GetContextAMD64()->r11;
} else if (!expression.compare("r12")) {
*write_address = context.GetContextAMD64()->r12;
} else if (!expression.compare("r13")) {
*write_address = context.GetContextAMD64()->r13;
} else if (!expression.compare("r14")) {
*write_address = context.GetContextAMD64()->r14;
} else if (!expression.compare("r15")) {
*write_address = context.GetContextAMD64()->r15;
} else {
BPLOG(ERROR) << "Unsupported register";
return false;
}
break;
default:
// This should not occur since the same switch condition
// should have terminated this method.
return false;
}
// Add or subtract constant from write address (if applicable).
*write_address =
positive_add_constant ?
*write_address + add_constant : *write_address - add_constant;
return true;
}
// static
bool ExploitabilityLinux::GetObjdumpInstructionLine(
const char* objdump_output_buffer,
string* instruction_line) {
// Put buffer data into stream to output line-by-line.
std::stringstream objdump_stream;
objdump_stream.str(string(objdump_output_buffer));
// Pipe each output line into the string until the string contains the first
// instruction from objdump. All lines before the "<.data>:" section are
// skipped. Loop until the line shows the first instruction or there are no
// lines left.
bool data_section_seen = false;
do {
if (!getline(objdump_stream, *instruction_line)) {
BPLOG(INFO) << "Objdump instructions not found";
return false;
}
if (instruction_line->find("<.data>:") != string::npos) {
data_section_seen = true;
}
} while (!data_section_seen || instruction_line->find("0:") == string::npos);
// This first instruction contains the above substring.
return true;
}
bool ExploitabilityLinux::TokenizeObjdumpInstruction(const string& line,
string* operation,
string* dest,
string* src) {
if (!operation || !dest || !src) {
BPLOG(ERROR) << "Null parameters passed.";
return false;
}
// Set all pointer values to empty strings.
*operation = "";
*dest = "";
*src = "";
// Tokenize the objdump line.
vector<string> tokens;
std::istringstream line_stream(line);
copy(std::istream_iterator<string>(line_stream),
std::istream_iterator<string>(),
std::back_inserter(tokens));
// Regex for the data in hex form. Each byte is two hex digits.
regex_t regex;
regcomp(&regex, "^[[:xdigit:]]{2}$", REG_EXTENDED | REG_NOSUB);
// Find and set the location of the operator. The operator appears
// directly after the chain of bytes that define the instruction. The
// operands will be the last token, given that the instruction has operands.
// If not, the operator is the last token. The loop skips the first token
// because the first token is the instruction number (namely "0:").
string operands = "";
for (size_t i = 1; i < tokens.size(); i++) {
// Check if current token no longer is in byte format.
if (regexec(&regex, tokens[i].c_str(), 0, NULL, 0)) {
// instruction = tokens[i];
*operation = tokens[i];
// If the operator is the last token, there are no operands.
if (i != tokens.size() - 1) {
operands = tokens[tokens.size() - 1];
}
break;
}
}
regfree(&regex);
if (operation->empty()) {
BPLOG(ERROR) << "Failed to parse out operation from objdump instruction.";
return false;
}
// Split operands into source and destination (if applicable).
if (!operands.empty()) {
size_t delim = operands.find(',');
if (delim == string::npos) {
*dest = operands;
} else {
*dest = operands.substr(0, delim);
*src = operands.substr(delim + 1);
}
}
return true;
}
bool ExploitabilityLinux::DisassembleBytes(const string& architecture,
const uint8_t* raw_bytes,
const unsigned int raw_bytes_len,
const unsigned int buffer_len,
char* objdump_output_buffer) {
if (!raw_bytes || !objdump_output_buffer ||
raw_bytes_len > MAX_INSTRUCTION_LEN) {
BPLOG(ERROR) << "Bad input parameters.";
return false;
}
// Write raw bytes around instruction pointer to a temporary file to
// pass as an argument to objdump.
char raw_bytes_tmpfile[] = "/tmp/breakpad_mem_region-raw_bytes-XXXXXX";
int raw_bytes_fd = mkstemp(raw_bytes_tmpfile);
if (raw_bytes_fd < 0) {
BPLOG(ERROR) << "Failed to create tempfile.";
unlink(raw_bytes_tmpfile);
return false;
}
// Casting raw_bytes_len to `ssize_t` won't cause a sign flip, since we check
// its bounds above.
if (write(raw_bytes_fd, raw_bytes, raw_bytes_len) != (ssize_t)raw_bytes_len) {
BPLOG(ERROR) << "Writing of raw bytes failed.";
unlink(raw_bytes_tmpfile);
return false;
}
char cmd[1024] = {0};
snprintf(cmd,
1024,
"objdump -D -b binary -M intel -m %s %s",
architecture.c_str(),
raw_bytes_tmpfile);
FILE* objdump_fp = popen(cmd, "r");
if (!objdump_fp) {
unlink(raw_bytes_tmpfile);
BPLOG(ERROR) << "Failed to call objdump.";
return false;
}
if (fread(objdump_output_buffer, 1, buffer_len, objdump_fp) <= 0) {
pclose(objdump_fp);
unlink(raw_bytes_tmpfile);
BPLOG(ERROR) << "Failed to read objdump output.";
return false;
}
pclose(objdump_fp);
unlink(raw_bytes_tmpfile);
return true;
}
#endif // _WIN32
bool ExploitabilityLinux::StackPointerOffStack(uint64_t stack_ptr) {
MinidumpLinuxMapsList* linux_maps_list = dump_->GetLinuxMapsList();
// Inconclusive if there are no mappings available.

View file

@ -75,42 +75,6 @@ class ExploitabilityLinux : public Exploitability {
// instruction is at a spot in memory that prohibits writes.
bool EndedOnIllegalWrite(uint64_t instruction_ptr);
#ifndef _WIN32
// Disassembles raw bytes via objdump and pipes the output into the provided
// buffer, given the desired architecture, the file from which objdump will
// read, and the buffer length. The method returns whether the disassembly
// was a success, and the caller owns all pointers.
static bool DisassembleBytes(const string& architecture,
const uint8_t* raw_bytes,
const unsigned int raw_bytes_len,
const unsigned int MAX_OBJDUMP_BUFFER_LEN,
char* objdump_output_buffer);
// Parses the objdump output given in |objdump_output_buffer| and extracts
// the line of the first instruction into |instruction_line|. Returns true
// when the instruction line is successfully extracted.
static bool GetObjdumpInstructionLine(
const char* objdump_output_buffer,
string* instruction_line);
// Tokenizes out the operation and operands from a line of instruction
// disassembled by objdump. This method modifies the pointers to match the
// tokens of the instruction, and returns if the tokenizing was a success.
// The caller owns all pointers.
static bool TokenizeObjdumpInstruction(const string& line,
string* operation,
string* dest,
string* src);
// Calculates the effective address of an expression in the form reg+a or
// reg-a, where 'reg' is a register and 'a' is a constant, and writes the
// result in the pointer. The method returns whether the calculation was
// a success. The caller owns the pointer.
static bool CalculateAddress(const string& address_expression,
const DumpContext& context,
uint64_t* write_address);
#endif // _WIN32
// Checks if the stack pointer points to a memory mapping that is not
// labelled as the stack.
bool StackPointerOffStack(uint64_t stack_ptr);

View file

@ -44,15 +44,6 @@
#ifdef __linux__
namespace google_breakpad {
class ExploitabilityLinuxTest : public ExploitabilityLinux {
public:
using ExploitabilityLinux::CalculateAddress;
using ExploitabilityLinux::DisassembleBytes;
using ExploitabilityLinux::GetObjdumpInstructionLine;
using ExploitabilityLinux::TokenizeObjdumpInstruction;
};
class ExploitabilityLinuxTestMinidumpContext : public MinidumpContext {
public:
explicit ExploitabilityLinuxTestMinidumpContext(
@ -70,7 +61,6 @@ namespace {
using google_breakpad::BasicSourceLineResolver;
#ifdef __linux__
using google_breakpad::ExploitabilityLinuxTest;
using google_breakpad::ExploitabilityLinuxTestMinidumpContext;
#endif // __linux__
using google_breakpad::MinidumpProcessor;
@ -185,120 +175,4 @@ TEST(ExploitabilityTest, TestLinuxEngine) {
#endif // __linux__
}
#ifdef __linux__
TEST(ExploitabilityLinuxUtilsTest, DisassembleBytesTest) {
ASSERT_FALSE(ExploitabilityLinuxTest::DisassembleBytes("", NULL, 0, 5, NULL));
uint8_t bytes[6] = {0xc7, 0x0, 0x5, 0x0, 0x0, 0x0};
char buffer[1024] = {0};
ASSERT_TRUE(ExploitabilityLinuxTest::DisassembleBytes(
"i386:x86-64", bytes, std::extent<decltype(bytes)>::value, 1024, buffer));
std::stringstream objdump_stream;
objdump_stream.str(string(buffer));
string line = "";
while (line.find("<.data>") == string::npos)
getline(objdump_stream, line);
getline(objdump_stream, line);
ASSERT_EQ(line, " 0:\tc7 00 05 00 00 00 \tmov DWORD PTR [rax],0x5");
}
TEST(ExploitabilityLinuxUtilsTest, GetObjdumpInstructionLine) {
string disassebly =
"\n"
"/tmp/breakpad_mem_region-raw_bytes-tMmMo0: file format binary\n"
"// Trying to confuse the parser 0:\n"
"\n"
"Disassembly of section .data:\n"
"\n"
"0000000000000000 <.data>:\n"
" 0:\tc7 00 01 00 00 00 \tmov DWORD PTR [rax],0x1\n"
" 6:\t5d \tpop rbp\n"
" 7:\tc3 \tret \n"
" 8:\t55 \tpush rbp\n"
" 9:\t48 89 e5 \tmov rbp,rsp\n"
" c:\t53 \tpush rbx\n"
" d:\t48 \trex.W\n"
" e:\t81 \t.byte 0x81\n";
string line;
EXPECT_TRUE(ExploitabilityLinuxTest::GetObjdumpInstructionLine(
disassebly.c_str(), &line));
EXPECT_EQ(" 0:\tc7 00 01 00 00 00 \tmov DWORD PTR [rax],0x1", line);
// There is no "0:" after "<.data>:". Expected to return false.
disassebly =
"\n"
"/tmp/breakpad_mem_region-raw_bytes-tMmMo0: file format binary\n"
"// Trying to confuse the parser 0:\n"
"\n"
"Disassembly of section .data:\n"
"\n"
" 0:\tc7 00 01 00 00 00 \tmov DWORD PTR [rax],0x1\n"
" 6:\t5d \tpop rbp\n"
" 7:\tc3 \tret \n"
" 8:\t55 \tpush rbp\n"
" 9:\t48 89 e5 \tmov rbp,rsp\n"
" d:\t48 \trex.W\n"
"0000000000000000 <.data>:\n"
" c:\t53 \tpush rbx\n";
EXPECT_FALSE(ExploitabilityLinuxTest::GetObjdumpInstructionLine(
disassebly.c_str(), &line));
}
TEST(ExploitabilityLinuxUtilsTest, TokenizeObjdumpInstructionTest) {
ASSERT_FALSE(ExploitabilityLinuxTest::TokenizeObjdumpInstruction("",
NULL,
NULL,
NULL));
string line = "0: c7 00 05 00 00 00 mov DWORD PTR [rax],0x5";
string operation = "";
string dest = "";
string src = "";
ASSERT_TRUE(ExploitabilityLinuxTest::TokenizeObjdumpInstruction(line,
&operation,
&dest,
&src));
ASSERT_EQ(operation, "mov");
ASSERT_EQ(dest, "[rax]");
ASSERT_EQ(src, "0x5");
line = "0: c3 ret";
ASSERT_TRUE(ExploitabilityLinuxTest::TokenizeObjdumpInstruction(line,
&operation,
&dest,
&src));
ASSERT_EQ(operation, "ret");
ASSERT_EQ(dest, "");
ASSERT_EQ(src, "");
line = "0: 5f pop rdi";
ASSERT_TRUE(ExploitabilityLinuxTest::TokenizeObjdumpInstruction(line,
&operation,
&dest,
&src));
ASSERT_EQ(operation, "pop");
ASSERT_EQ(dest, "rdi");
ASSERT_EQ(src, "");
}
TEST(ExploitabilityLinuxUtilsTest, CalculateAddressTest) {
MDRawContextAMD64 raw_context;
raw_context.rdx = 12345;
ExploitabilityLinuxTestMinidumpContext context(raw_context);
ASSERT_EQ(context.GetContextAMD64()->rdx, 12345U);
ASSERT_FALSE(ExploitabilityLinuxTest::CalculateAddress("", context, NULL));
uint64_t write_address = 0;
ASSERT_TRUE(ExploitabilityLinuxTest::CalculateAddress("rdx-0x4D2",
context,
&write_address));
ASSERT_EQ(write_address, 11111U);
ASSERT_TRUE(ExploitabilityLinuxTest::CalculateAddress("rdx+0x4D2",
context,
&write_address));
ASSERT_EQ(write_address, 13579U);
ASSERT_FALSE(ExploitabilityLinuxTest::CalculateAddress("rdx+rax",
context,
&write_address));
ASSERT_FALSE(ExploitabilityLinuxTest::CalculateAddress("0x3482+0x4D2",
context,
&write_address));
}
#endif // __linux__
} // namespace

View file

@ -51,6 +51,8 @@
'contained_range_map.h',
'convert_old_arm64_context.cc',
'convert_old_arm64_context.h',
'disassembler_objdump.cc',
'disassembler_objdump.h',
'disassembler_x86.cc',
'disassembler_x86.h',
'dump_context.cc',
@ -147,6 +149,7 @@
'basic_source_line_resolver_unittest.cc',
'cfi_frame_info_unittest.cc',
'contained_range_map_unittest.cc',
'disassembler_objdump_unittest.cc',
'disassembler_x86_unittest.cc',
'exploitability_unittest.cc',
'fast_source_line_resolver_unittest.cc',