breakpad/src/processor/disassembler_objdump.cc
Ian Barkley-Yeung f5123d7196 Add #include <config.h> to the beginning of all cc files
Added
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
to the beginning of all source files that didn't have it.

This ensures that configuration options are respected in all source
files. In particular, it ensures that the defines needed to fix Large
File System issues are set before including system headers.

More generally, it ensures consistency between the source files, and
avoids the possibility of ODR violations between source files that were
including config.h and source files that were not.

Process:
Ran
find . \( -name third_party -prune \) -o \( -name '.git*' -prune \) -o \( \( -name '*.cc' -o -name '*.c' \) -exec sed -i '0,/^#include/ s/^#include/#ifdef HAVE_CONFIG_H\n#include <config.h>  \/\/ Must come first\n#endif\n\n#include/' {} + \)
and then manually fixed up src/common/linux/guid_creator.cc,
src/tools/solaris/dump_syms/testdata/dump_syms_regtest.cc,
src/tools/windows/dump_syms/testdata/dump_syms_regtest.cc,
src/common/stabs_reader.h, and src/common/linux/breakpad_getcontext.h.

BUG=google-breakpad:877
Fixed: google-breakpad:877
TEST=./configure && make && make check
TEST=Did the find/sed in ChromeOS's copy, ensured emerge-hana google-breakpad
worked and had fewer LFS violations.
TEST=Did the find/sed in Chrome's copy, ensured compiling hana, windows, linux, and
eve still worked (since Chrome doesn't used config.h)

Change-Id: I16cededbba0ea0c28e919b13243e35300999e799
Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/4289676
Reviewed-by: Mike Frysinger <vapier@chromium.org>
2023-02-27 19:31:32 +00:00

487 lines
No EOL
16 KiB
C++

// Copyright (c) 2022, Google LLC
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google LLC nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// disassembler_objdump.: Disassembler that invokes objdump for disassembly.
//
// Author: Mark Brand
#ifdef HAVE_CONFIG_H
#include <config.h> // Must come first
#endif
#include "processor/disassembler_objdump.h"
#include <unistd.h>
#include <sys/wait.h>
#include <array>
#include <fstream>
#include <iostream>
#include <iterator>
#include <regex>
#include <sstream>
#include <vector>
#include "common/linux/eintr_wrapper.h"
#include "common/linux/scoped_pipe.h"
#include "common/linux/scoped_tmpfile.h"
#include "processor/logging.h"
namespace google_breakpad {
namespace {
const size_t kMaxX86InstructionLength = 15;
bool IsInstructionPrefix(const string& token) {
if (token == "lock" || token == "rep" || token == "repz" ||
token == "repnz") {
return true;
}
return false;
}
bool IsOperandSize(const string& token) {
if (token == "BYTE" || token == "WORD" || token == "DWORD" ||
token == "QWORD" || token == "PTR") {
return true;
}
return false;
}
bool GetSegmentAddressX86(const DumpContext& context, string segment_name,
uint64_t& address) {
if (segment_name == "ds") {
address = context.GetContextX86()->ds;
} else if (segment_name == "es") {
address = context.GetContextX86()->es;
} else if (segment_name == "fs") {
address = context.GetContextX86()->fs;
} else if (segment_name == "gs") {
address = context.GetContextX86()->gs;
} else {
BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
return false;
}
return true;
}
bool GetSegmentAddressAMD64(const DumpContext& context, string segment_name,
uint64_t& address) {
if (segment_name == "ds") {
address = 0;
} else if (segment_name == "es") {
address = 0;
} else {
BPLOG(ERROR) << "Unsupported segment register: " << segment_name;
return false;
}
return true;
}
bool GetSegmentAddress(const DumpContext& context, string segment_name,
uint64_t& address) {
if (context.GetContextCPU() == MD_CONTEXT_X86) {
return GetSegmentAddressX86(context, segment_name, address);
} else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
return GetSegmentAddressAMD64(context, segment_name, address);
} else {
BPLOG(ERROR) << "Unsupported architecture for GetSegmentAddress\n";
return false;
}
}
bool GetRegisterValueX86(const DumpContext& context, string register_name,
uint64_t& value) {
if (register_name == "eax") {
value = context.GetContextX86()->eax;
} else if (register_name == "ebx") {
value = context.GetContextX86()->ebx;
} else if (register_name == "ecx") {
value = context.GetContextX86()->ecx;
} else if (register_name == "edx") {
value = context.GetContextX86()->edx;
} else if (register_name == "edi") {
value = context.GetContextX86()->edi;
} else if (register_name == "esi") {
value = context.GetContextX86()->esi;
} else if (register_name == "ebp") {
value = context.GetContextX86()->ebp;
} else if (register_name == "esp") {
value = context.GetContextX86()->esp;
} else if (register_name == "eip") {
value = context.GetContextX86()->eip;
} else {
BPLOG(ERROR) << "Unsupported register: " << register_name;
return false;
}
return true;
}
bool GetRegisterValueAMD64(const DumpContext& context, string register_name,
uint64_t& value) {
if (register_name == "rax") {
value = context.GetContextAMD64()->rax;
} else if (register_name == "rbx") {
value = context.GetContextAMD64()->rbx;
} else if (register_name == "rcx") {
value = context.GetContextAMD64()->rcx;
} else if (register_name == "rdx") {
value = context.GetContextAMD64()->rdx;
} else if (register_name == "rdi") {
value = context.GetContextAMD64()->rdi;
} else if (register_name == "rsi") {
value = context.GetContextAMD64()->rsi;
} else if (register_name == "rbp") {
value = context.GetContextAMD64()->rbp;
} else if (register_name == "rsp") {
value = context.GetContextAMD64()->rsp;
} else if (register_name == "r8") {
value = context.GetContextAMD64()->r8;
} else if (register_name == "r9") {
value = context.GetContextAMD64()->r9;
} else if (register_name == "r10") {
value = context.GetContextAMD64()->r10;
} else if (register_name == "r11") {
value = context.GetContextAMD64()->r11;
} else if (register_name == "r12") {
value = context.GetContextAMD64()->r12;
} else if (register_name == "r13") {
value = context.GetContextAMD64()->r13;
} else if (register_name == "r14") {
value = context.GetContextAMD64()->r14;
} else if (register_name == "r15") {
value = context.GetContextAMD64()->r15;
} else if (register_name == "rip") {
value = context.GetContextAMD64()->rip;
} else {
BPLOG(ERROR) << "Unsupported register: " << register_name;
return false;
}
return true;
}
// Lookup the value of `register_name` in `context`, store it into `value` on
// success.
// Support for non-full-size registers not implemented, since we're only using
// this to evaluate address expressions.
bool GetRegisterValue(const DumpContext& context, string register_name,
uint64_t& value) {
if (context.GetContextCPU() == MD_CONTEXT_X86) {
return GetRegisterValueX86(context, register_name, value);
} else if (context.GetContextCPU() == MD_CONTEXT_AMD64) {
return GetRegisterValueAMD64(context, register_name, value);
} else {
BPLOG(ERROR) << "Unsupported architecture for GetRegisterValue\n";
return false;
}
}
} // namespace
// static
bool DisassemblerObjdump::DisassembleInstruction(uint32_t cpu,
const uint8_t* raw_bytes,
unsigned int raw_bytes_len,
string& instruction) {
// Always initialize outputs
instruction = "";
if (!raw_bytes || raw_bytes_len == 0) {
// There's no need to perform any operation in this case, as there's
// clearly no instruction there.
return false;
}
string architecture;
if (cpu == MD_CONTEXT_X86) {
architecture = "i386";
} else if (cpu == MD_CONTEXT_AMD64) {
architecture = "i386:x86-64";
} else {
BPLOG(ERROR) << "Unsupported architecture.";
return false;
}
// Create a temporary file for the raw instruction bytes to pass to
// objdump, and write the bytes to the input file.
ScopedTmpFile raw_bytes_file;
if (!raw_bytes_file.InitData(raw_bytes, raw_bytes_len)) {
BPLOG(ERROR) << "Failed creating temporary file.";
return false;
}
// Create a pipe to use to read the disassembly back from objdump.
ScopedPipe disassembly_pipe;
if (!disassembly_pipe.Init()) {
BPLOG(ERROR) << "Failed creating pipe for output.";
return false;
}
pid_t child_pid = fork();
if (child_pid < 0) {
BPLOG(ERROR) << "Fork failed.";
return false;
}
if (child_pid == 0) {
// In the child process, set up the input and output file descriptors.
if (dup2(raw_bytes_file.GetFd(), STDIN_FILENO) < 0 ||
disassembly_pipe.Dup2WriteFd(STDOUT_FILENO) < 0 ||
disassembly_pipe.Dup2WriteFd(STDERR_FILENO) < 0) {
BPLOG(ERROR) << "Failed dup'ing file descriptors.";
exit(-1);
}
// We need to close the read end of the pipe in the child process so that
// when the parent closes it, the pipe is disconnected.
disassembly_pipe.CloseReadFd();
// We use "/proc/self/fd/0" here to allow objdump to parse an unnamed file,
// since objdump does not have a mode to read from stdin. This cannot be
// used with a pipe, since objdump requires that the input is a standard
// file.
execlp("objdump", "objdump", "-D", "--no-show-raw-insn", "-b", "binary",
"-M", "intel", "-m", architecture.c_str(), "/proc/self/fd/0",
nullptr);
BPLOG(ERROR) << "Failed to exec objdump.";
exit(-1);
} else {
// In the parent process, parse the objdump output.
// Match the instruction line, from:
// 0: lock cmpxchg DWORD PTR [esi+0x10],eax
// extract the string "lock cmpxchg DWORD PTR [esi+0x10],eax"
std::regex instruction_regex(
"^\\s+[0-9a-f]+:\\s+" // " 0:"
"((?:\\s*\\S*)+)$"); // "lock cmpxchg..."
std::string line;
std::smatch match;
while (disassembly_pipe.ReadLine(line)) {
if (std::regex_match(line, match, instruction_regex)) {
instruction = match[1].str();
break;
}
}
// Close the read pipe so that objdump will exit (in case we broke out of
// the loop above before reading all of the output).
disassembly_pipe.CloseReadFd();
// Now wait for objdump to exit.
int status = 0;
HANDLE_EINTR(waitpid(child_pid, &status, 0));
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
BPLOG(ERROR) << "objdump didn't run successfully.";
return false;
}
if (instruction == "") {
BPLOG(ERROR) << "Failed to find instruction in objdump output.";
return false;
}
}
return true;
}
// static
bool DisassemblerObjdump::TokenizeInstruction(const string& instruction,
string& operation, string& dest,
string& src) {
// Always initialize outputs.
operation = "";
dest = "";
src = "";
// Split the instruction into tokens by either whitespace or comma.
std::regex token_regex("((?:[^\\s,]+)|,)(?:\\s)*");
std::sregex_iterator tokens_begin(instruction.begin(), instruction.end(),
token_regex);
bool found_comma = false;
for (auto tokens_iter = tokens_begin; tokens_iter != std::sregex_iterator();
++tokens_iter) {
auto token = (*tokens_iter)[1].str();
if (operation.size() == 0) {
if (IsInstructionPrefix(token))
continue;
operation = token;
} else if (dest.size() == 0) {
if (IsOperandSize(token))
continue;
dest = token;
} else if (!found_comma) {
if (token == ",") {
found_comma = true;
} else {
BPLOG(ERROR) << "Failed to parse operands from objdump output, expected"
" comma but found \""
<< token << "\"";
return false;
}
} else if (src.size() == 0) {
if (IsOperandSize(token))
continue;
src = token;
} else {
if (token == ",") {
BPLOG(ERROR) << "Failed to parse operands from objdump output, found "
"unexpected comma after last operand.";
return false;
} else {
// We just ignore other junk after the last operand unless it's a
// comma, which would indicate we're probably still in the middle
// of the operands and something has gone wrong
}
}
}
if (found_comma && src.size() == 0) {
BPLOG(ERROR) << "Failed to parse operands from objdump output, found comma "
"but no src operand.";
return false;
}
return true;
}
// static
bool DisassemblerObjdump::CalculateAddress(const DumpContext& context,
const string& expression,
uint64_t& address) {
address = 0;
// Extract the components of the expression.
// fs:[esi+edi*4+0x80] -> ["fs", "esi", "edi", "4", "-", "0x80"]
std::regex expression_regex(
"^(?:(\\ws):)?" // "fs:"
"\\[(\\w+)" // "[esi"
"(?:\\+(\\w+)(?:\\*(\\d+)))?" // "+edi*4"
"(?:([\\+-])(0x[0-9a-f]+))?" // "-0x80"
"\\]$"); // "]"
std::smatch match;
if (!std::regex_match(expression, match, expression_regex) ||
match.size() != 7) {
return false;
}
string segment_name = match[1].str();
string register_name = match[2].str();
string index_name = match[3].str();
string index_stride = match[4].str();
string offset_sign = match[5].str();
string offset = match[6].str();
uint64_t segment_address = 0;
uint64_t register_value = 0;
uint64_t index_value = 0;
uint64_t index_stride_value = 1;
uint64_t offset_value = 0;
if (segment_name.size() &&
!GetSegmentAddress(context, segment_name, segment_address)) {
return false;
}
if (!GetRegisterValue(context, register_name, register_value)) {
return false;
}
if (index_name.size() &&
!GetRegisterValue(context, index_name, index_value)) {
return false;
}
if (index_stride.size()) {
index_stride_value = strtoull(index_stride.c_str(), nullptr, 0);
}
if (offset.size()) {
offset_value = strtoull(offset.c_str(), nullptr, 0);
}
address =
segment_address + register_value + (index_value * index_stride_value);
if (offset_sign == "+") {
address += offset_value;
} else if (offset_sign == "-") {
address -= offset_value;
}
return true;
}
DisassemblerObjdump::DisassemblerObjdump(const uint32_t cpu,
const MemoryRegion* memory_region,
uint64_t address) {
if (address < memory_region->GetBase() ||
memory_region->GetBase() + memory_region->GetSize() <= address) {
return;
}
uint8_t ip_bytes[kMaxX86InstructionLength] = {0};
size_t ip_bytes_length;
for (ip_bytes_length = 0; ip_bytes_length < kMaxX86InstructionLength;
++ip_bytes_length) {
// We have to read byte-by-byte here, since we still want to try and
// disassemble an instruction even if we don't have enough bytes.
if (!memory_region->GetMemoryAtAddress(address + ip_bytes_length,
&ip_bytes[ip_bytes_length])) {
break;
}
}
string instruction;
if (!DisassembleInstruction(cpu, ip_bytes, kMaxX86InstructionLength,
instruction)) {
return;
}
if (!TokenizeInstruction(instruction, operation_, dest_, src_)) {
return;
}
}
bool DisassemblerObjdump::CalculateSrcAddress(const DumpContext& context,
uint64_t& address) {
return CalculateAddress(context, src_, address);
}
bool DisassemblerObjdump::CalculateDestAddress(const DumpContext& context,
uint64_t& address) {
return CalculateAddress(context, dest_, address);
}
} // namespace google_breakpad