dynarmic/src/dynarmic/backend/arm64/a32_address_space.cpp

305 lines
12 KiB
C++

/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/arm64/a32_address_space.h"
#include "dynarmic/backend/arm64/a32_jitstate.h"
#include "dynarmic/backend/arm64/abi.h"
#include "dynarmic/backend/arm64/devirtualize.h"
#include "dynarmic/backend/arm64/emit_arm64.h"
#include "dynarmic/backend/arm64/stack_layout.h"
#include "dynarmic/common/cast_util.h"
#include "dynarmic/common/fp/fpcr.h"
#include "dynarmic/frontend/A32/a32_location_descriptor.h"
#include "dynarmic/frontend/A32/translate/a32_translate.h"
#include "dynarmic/interface/A32/config.h"
#include "dynarmic/interface/exclusive_monitor.h"
#include "dynarmic/ir/opt/passes.h"
namespace Dynarmic::Backend::Arm64 {
template<auto mfp, typename T>
static void* EmitCallTrampoline(oaknut::CodeGenerator& code, T* this_) {
using namespace oaknut::util;
const auto info = Devirtualize<mfp>(this_);
oaknut::Label l_addr, l_this;
void* target = code.ptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(info.this_ptr);
code.l(l_addr);
code.dx(info.fn_ptr);
return target;
}
template<auto callback, typename T>
static void* EmitExclusiveReadCallTrampoline(oaknut::CodeGenerator& code, const A32::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A32::UserConfig& conf, A32::VAddr vaddr) -> T {
return conf.global_monitor->ReadAndMark<T>(conf.processor_id, vaddr, [&]() -> T {
return (conf.callbacks->*callback)(vaddr);
});
};
void* target = code.ptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
template<auto callback, typename T>
static void* EmitExclusiveWriteCallTrampoline(oaknut::CodeGenerator& code, const A32::UserConfig& conf) {
using namespace oaknut::util;
oaknut::Label l_addr, l_this;
auto fn = [](const A32::UserConfig& conf, A32::VAddr vaddr, T value) -> u32 {
return conf.global_monitor->DoExclusiveOperation<T>(conf.processor_id, vaddr,
[&](T expected) -> bool {
return (conf.callbacks->*callback)(vaddr, value, expected);
})
? 0
: 1;
};
void* target = code.ptr<void*>();
code.LDR(X0, l_this);
code.LDR(Xscratch0, l_addr);
code.BR(Xscratch0);
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(&conf));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
return target;
}
A32AddressSpace::A32AddressSpace(const A32::UserConfig& conf)
: AddressSpace(conf.code_cache_size)
, conf(conf) {
EmitPrelude();
}
IR::Block A32AddressSpace::GenerateIR(IR::LocationDescriptor descriptor) const {
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
Optimization::PolyfillPass(ir_block, {});
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
Optimization::A32GetSetElimination(ir_block, {.convert_nzc_to_nz = true});
Optimization::DeadCodeElimination(ir_block);
}
if (conf.HasOptimization(OptimizationFlag::ConstProp)) {
Optimization::A32ConstantMemoryReads(ir_block, conf.callbacks);
Optimization::ConstantPropagation(ir_block);
Optimization::DeadCodeElimination(ir_block);
}
Optimization::VerificationPass(ir_block);
return ir_block;
}
void A32AddressSpace::EmitPrelude() {
using namespace oaknut::util;
mem.unprotect();
prelude_info.read_memory_8 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead8>(code, conf.callbacks);
prelude_info.read_memory_16 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead16>(code, conf.callbacks);
prelude_info.read_memory_32 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead32>(code, conf.callbacks);
prelude_info.read_memory_64 = EmitCallTrampoline<&A32::UserCallbacks::MemoryRead64>(code, conf.callbacks);
prelude_info.exclusive_read_memory_8 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead8, u8>(code, conf);
prelude_info.exclusive_read_memory_16 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead16, u16>(code, conf);
prelude_info.exclusive_read_memory_32 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead32, u32>(code, conf);
prelude_info.exclusive_read_memory_64 = EmitExclusiveReadCallTrampoline<&A32::UserCallbacks::MemoryRead64, u64>(code, conf);
prelude_info.write_memory_8 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite8>(code, conf.callbacks);
prelude_info.write_memory_16 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite16>(code, conf.callbacks);
prelude_info.write_memory_32 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite32>(code, conf.callbacks);
prelude_info.write_memory_64 = EmitCallTrampoline<&A32::UserCallbacks::MemoryWrite64>(code, conf.callbacks);
prelude_info.exclusive_write_memory_8 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive8, u8>(code, conf);
prelude_info.exclusive_write_memory_16 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive16, u16>(code, conf);
prelude_info.exclusive_write_memory_32 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive32, u32>(code, conf);
prelude_info.exclusive_write_memory_64 = EmitExclusiveWriteCallTrampoline<&A32::UserCallbacks::MemoryWriteExclusive64, u64>(code, conf);
prelude_info.call_svc = EmitCallTrampoline<&A32::UserCallbacks::CallSVC>(code, conf.callbacks);
prelude_info.exception_raised = EmitCallTrampoline<&A32::UserCallbacks::ExceptionRaised>(code, conf.callbacks);
prelude_info.isb_raised = EmitCallTrampoline<&A32::UserCallbacks::InstructionSynchronizationBarrierRaised>(code, conf.callbacks);
prelude_info.add_ticks = EmitCallTrampoline<&A32::UserCallbacks::AddTicks>(code, conf.callbacks);
prelude_info.get_ticks_remaining = EmitCallTrampoline<&A32::UserCallbacks::GetTicksRemaining>(code, conf.callbacks);
oaknut::Label return_from_run_code;
prelude_info.run_code = code.ptr<PreludeInfo::RunCodeFuncType>();
{
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.enable_cycle_counting) {
code.BL(prelude_info.get_ticks_remaining);
code.MOV(Xticks, X0);
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
}
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.AND(Wscratch0, Wscratch0, 0xffff0000);
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
code.BR(X19);
}
prelude_info.step_code = code.ptr<PreludeInfo::RunCodeFuncType>();
{
ABI_PushRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.MOV(X19, X0);
code.MOV(Xstate, X1);
code.MOV(Xhalt, X2);
if (conf.enable_cycle_counting) {
code.MOV(Xticks, 1);
code.STR(Xticks, SP, offsetof(StackLayout, cycles_to_run));
}
code.LDR(Wscratch0, Xstate, offsetof(A32JitState, upper_location_descriptor));
code.AND(Wscratch0, Wscratch0, 0xffff0000);
code.MRS(Xscratch1, oaknut::SystemReg::FPCR);
code.STR(Wscratch1, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
oaknut::Label step_hr_loop;
code.l(step_hr_loop);
code.LDAXR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
code.ORR(Wscratch0, Wscratch0, static_cast<u32>(HaltReason::Step));
code.STLXR(Wscratch1, Wscratch0, Xhalt);
code.CBNZ(Wscratch1, step_hr_loop);
code.BR(X19);
}
prelude_info.return_to_dispatcher = code.ptr<void*>();
{
oaknut::Label l_this, l_addr;
code.LDAR(Wscratch0, Xhalt);
code.CBNZ(Wscratch0, return_from_run_code);
if (conf.enable_cycle_counting) {
code.CMP(Xticks, 0);
code.B(LE, return_from_run_code);
}
code.LDR(X0, l_this);
code.MOV(X1, Xstate);
code.LDR(Xscratch0, l_addr);
code.BLR(Xscratch0);
code.BR(X0);
const auto fn = [](A32AddressSpace& self, A32JitState& context) -> CodePtr {
return self.GetOrEmit(context.GetLocationDescriptor());
};
code.align(8);
code.l(l_this);
code.dx(mcl::bit_cast<u64>(this));
code.l(l_addr);
code.dx(mcl::bit_cast<u64>(Common::FptrCast(fn)));
}
prelude_info.return_from_run_code = code.ptr<void*>();
{
code.l(return_from_run_code);
if (conf.enable_cycle_counting) {
code.LDR(X1, SP, offsetof(StackLayout, cycles_to_run));
code.SUB(X1, X1, Xticks);
code.BL(prelude_info.add_ticks);
}
code.LDR(Wscratch0, SP, offsetof(StackLayout, save_host_fpcr));
code.MSR(oaknut::SystemReg::FPCR, Xscratch0);
oaknut::Label exit_hr_loop;
code.l(exit_hr_loop);
code.LDAXR(W0, Xhalt);
code.STLXR(Wscratch0, WZR, Xhalt);
code.CBNZ(Wscratch0, exit_hr_loop);
ABI_PopRegisters(code, ABI_CALLEE_SAVE | (1 << 30), sizeof(StackLayout));
code.RET();
}
prelude_info.end_of_prelude = code.ptr<u32*>();
mem.invalidate_all();
mem.protect();
}
EmitConfig A32AddressSpace::GetEmitConfig() {
return EmitConfig{
.optimizations = conf.unsafe_optimizations ? conf.optimizations : conf.optimizations & all_safe_optimizations,
.hook_isb = conf.hook_isb,
.cntfreq_el0{},
.ctr_el0{},
.dczid_el0{},
.tpidrro_el0{},
.tpidr_el0{},
.page_table_pointer = mcl::bit_cast<u64>(conf.page_table),
.page_table_address_space_bits = 32,
.page_table_pointer_mask_bits = conf.page_table_pointer_mask_bits,
.silently_mirror_page_table = true,
.absolute_offset_page_table = conf.absolute_offset_page_table,
.detect_misaligned_access_via_page_table = conf.detect_misaligned_access_via_page_table,
.only_detect_misalignment_via_page_table_on_page_boundary = conf.only_detect_misalignment_via_page_table_on_page_boundary,
.wall_clock_cntpct = conf.wall_clock_cntpct,
.enable_cycle_counting = conf.enable_cycle_counting,
.always_little_endian = conf.always_little_endian,
.descriptor_to_fpcr = [](const IR::LocationDescriptor& location) { return FP::FPCR{A32::LocationDescriptor{location}.FPSCR().Value()}; },
.emit_cond = EmitA32Cond,
.emit_condition_failed_terminal = EmitA32ConditionFailedTerminal,
.emit_terminal = EmitA32Terminal,
.state_nzcv_offset = offsetof(A32JitState, cpsr_nzcv),
.state_fpsr_offset = offsetof(A32JitState, fpsr),
.state_exclusive_state_offset = offsetof(A32JitState, exclusive_state),
.coprocessors = conf.coprocessors,
};
}
} // namespace Dynarmic::Backend::Arm64