Port x64 backend to xbyak

This commit is contained in:
MerryMage 2016-08-24 20:07:08 +01:00
parent 611cffb612
commit e32812cd00
25 changed files with 1638 additions and 5323 deletions

View file

@ -1,190 +0,0 @@
// This file is under the public domain.
#pragma once
#include <cstddef>
#ifdef _WIN32
#include <intrin.h>
#endif
#include <initializer_list>
#include <new>
#include <type_traits>
#include "common/common_types.h"
// namespace avoids conflict with OS X Carbon; don't use BitSet<T> directly
namespace Common {
// Helper functions:
#ifdef _WIN32
template <typename T>
static inline int CountSetBits(T v)
{
// from https://graphics.stanford.edu/~seander/bithacks.html
// GCC has this built in, but MSVC's intrinsic will only emit the actual
// POPCNT instruction, which we're not depending on
v = v - ((v >> 1) & (T)~(T)0/3);
v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);
v = (v + (v >> 4)) & (T)~(T)0/255*15;
return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8;
}
static inline int LeastSignificantSetBit(uint8_t val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
static inline int LeastSignificantSetBit(uint16_t val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
static inline int LeastSignificantSetBit(uint32_t val)
{
unsigned long index;
_BitScanForward(&index, val);
return (int)index;
}
static inline int LeastSignificantSetBit(uint64_t val)
{
unsigned long index;
_BitScanForward64(&index, val);
return (int)index;
}
#else
static inline int CountSetBits(uint8_t val) { return __builtin_popcount(val); }
static inline int CountSetBits(uint16_t val) { return __builtin_popcount(val); }
static inline int CountSetBits(uint32_t val) { return __builtin_popcount(val); }
static inline int CountSetBits(uint64_t val) { return __builtin_popcountll(val); }
static inline int LeastSignificantSetBit(uint8_t val) { return __builtin_ctz(val); }
static inline int LeastSignificantSetBit(uint16_t val) { return __builtin_ctz(val); }
static inline int LeastSignificantSetBit(uint32_t val) { return __builtin_ctz(val); }
static inline int LeastSignificantSetBit(uint64_t val) { return __builtin_ctzll(val); }
#endif
// Similar to std::bitset, this is a class which encapsulates a bitset, i.e.
// using the set bits of an integer to represent a set of integers. Like that
// class, it acts like an array of bools:
// BitSet32 bs;
// bs[1] = true;
// but also like the underlying integer ([0] = least significant bit):
// BitSet32 bs2 = ...;
// bs = (bs ^ bs2) & BitSet32(0xffff);
// The following additional functionality is provided:
// - Construction using an initializer list.
// BitSet bs { 1, 2, 4, 8 };
// - Efficiently iterating through the set bits:
// for (int i : bs)
// [i is the *index* of a set bit]
// (This uses the appropriate CPU instruction to find the next set bit in one
// operation.)
// - Counting set bits using .Count() - see comment on that method.
// TODO: use constexpr when MSVC gets out of the Dark Ages
template <typename IntTy>
class BitSet
{
static_assert(!std::is_signed<IntTy>::value, "BitSet should not be used with signed types");
public:
// A reference to a particular bit, returned from operator[].
class Ref
{
public:
Ref(Ref&& other) : m_bs(other.m_bs), m_mask(other.m_mask) {}
Ref(BitSet* bs, IntTy mask) : m_bs(bs), m_mask(mask) {}
operator bool() const { return (m_bs->m_val & m_mask) != 0; }
bool operator=(bool set)
{
m_bs->m_val = (m_bs->m_val & ~m_mask) | (set ? m_mask : 0);
return set;
}
private:
BitSet* m_bs;
IntTy m_mask;
};
// A STL-like iterator is required to be able to use range-based for loops.
class Iterator
{
public:
Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {}
Iterator& operator=(Iterator other) { new (this) Iterator(other); return *this; }
int operator*() { return m_bit; }
Iterator& operator++()
{
if (m_val == 0)
{
m_bit = -1;
}
else
{
int bit = LeastSignificantSetBit(m_val);
m_val &= ~(1 << bit);
m_bit = bit;
}
return *this;
}
Iterator operator++(int _)
{
Iterator other(*this);
++*this;
return other;
}
bool operator==(Iterator other) const { return m_bit == other.m_bit; }
bool operator!=(Iterator other) const { return m_bit != other.m_bit; }
private:
IntTy m_val;
int m_bit;
};
BitSet() : m_val(0) {}
explicit BitSet(IntTy val) : m_val(val) {}
BitSet(std::initializer_list<int> init)
{
m_val = 0;
for (int bit : init)
m_val |= (IntTy)1 << bit;
}
static BitSet AllTrue(size_t count)
{
return BitSet(count == sizeof(IntTy)*8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
}
Ref operator[](size_t bit) { return Ref(this, (IntTy)1 << bit); }
const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; }
bool operator==(BitSet other) const { return m_val == other.m_val; }
bool operator!=(BitSet other) const { return m_val != other.m_val; }
bool operator<(BitSet other) const { return m_val < other.m_val; }
bool operator>(BitSet other) const { return m_val > other.m_val; }
BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); }
BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
BitSet operator~() const { return BitSet(~m_val); }
BitSet& operator|=(BitSet other) { return *this = *this | other; }
BitSet& operator&=(BitSet other) { return *this = *this & other; }
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
operator uint32_t() = delete;
operator bool() { return m_val != 0; }
// Warning: Even though on modern CPUs this is a single fast instruction,
// Dolphin's official builds do not currently assume POPCNT support on x86,
// so slower explicit bit twiddling is generated. Still should generally
// be faster than a loop.
unsigned int Count() const { return CountSetBits(m_val); }
Iterator begin() const { Iterator it(m_val, 0); return ++it; }
Iterator end() const { return Iterator(m_val, -1); }
IntTy m_val;
};
} // Common
typedef Common::BitSet<uint8_t> BitSet8;
typedef Common::BitSet<uint16_t> BitSet16;
typedef Common::BitSet<uint32_t> BitSet32;
typedef Common::BitSet<uint64_t> BitSet64;

View file

@ -1,92 +0,0 @@
// Copyright 2013 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include "common/common_types.h"
#include "common/memory_util.h"
// Everything that needs to generate code should inherit from this.
// You get memory management for free, plus, you can use all emitter functions without
// having to prefix them with gen-> or something similar.
// Example implementation:
// class JIT : public CodeBlock<ARMXEmitter> {}
template<class T> class CodeBlock : public T
{
private:
// A privately used function to set the executable RAM space to something invalid.
// For debugging usefulness it should be used to set the RAM to a host specific breakpoint instruction
virtual void PoisonMemory() = 0;
protected:
u8 *region;
size_t region_size;
public:
CodeBlock() : region(nullptr), region_size(0) {}
virtual ~CodeBlock() { if (region) FreeCodeSpace(); }
CodeBlock(const CodeBlock&) = delete;
CodeBlock& operator=(const CodeBlock&) = delete;
// Call this before you generate any code.
void AllocCodeSpace(int size)
{
region_size = size;
region = (u8*)AllocateExecutableMemory(region_size);
T::SetCodePtr(region);
}
// Always clear code space with breakpoints, so that if someone accidentally executes
// uninitialized, it just breaks into the debugger.
void ClearCodeSpace()
{
PoisonMemory();
ResetCodePtr();
}
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
void FreeCodeSpace()
{
#ifdef __SYMBIAN32__
ResetExecutableMemory(region);
#else
FreeMemoryPages(region, region_size);
#endif
region = nullptr;
region_size = 0;
}
bool IsInSpace(const u8 *ptr)
{
return (ptr >= region) && (ptr < (region + region_size));
}
// Cannot currently be undone. Will write protect the entire code region.
// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
void WriteProtect()
{
WriteProtectMemory(region, region_size, true);
}
void ResetCodePtr()
{
T::SetCodePtr(region);
}
size_t GetSpaceLeft() const
{
return region_size - (T::GetCodePtr() - region);
}
u8 *GetBasePtr() {
return region;
}
size_t GetOffset(const u8 *ptr) const {
return ptr - region;
}
};

View file

@ -0,0 +1,39 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* This software may be used and distributed according to the terms of the GNU
* General Public License version 2 or any later version.
*/
#pragma once
#include <iterator>
namespace Dynarmic {
namespace Common {
namespace detail {
template<typename T>
struct ReverseAdapter {
T& iterable;
auto begin() {
using namespace std;
return rbegin(iterable);
}
auto end() {
using namespace std;
return rend(iterable);
}
};
} // namespace detail
template<typename T>
detail::ReverseAdapter<T> Reverse(T&& iterable) {
return detail::ReverseAdapter<T>{iterable};
}
} // namespace Common
} // namespace Dynarmic

View file

@ -1,192 +0,0 @@
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/memory_util.h"
#ifdef _WIN32
#include <windows.h>
#include <psapi.h>
#include "common/string_util.h"
#else
#include <cstdlib>
#include <cstring>
#include <sys/mman.h>
#endif
#if !defined(_WIN32) && defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
#include <unistd.h>
#define PAGE_MASK (getpagesize() - 1)
#define round_page(x) ((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK))
#endif
// Generic function to get last error message.
// Call directly after the command or use the error num.
// This function might change the error code.
const char* GetLastErrorMsg()
{
static const size_t buff_size = 255;
#ifdef _WIN32
static thread_local char err_str[buff_size] = {};
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, GetLastError(),
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
err_str, buff_size, nullptr);
#else
static __thread char err_str[buff_size] = {};
// Thread safe (XSI-compliant)
strerror_r(errno, err_str, buff_size);
#endif
return err_str;
}
// This is purposely not a full wrapper for virtualalloc/mmap, but it
// provides exactly the primitive operations that Dolphin needs.
void* AllocateExecutableMemory(size_t size, bool low)
{
#if defined(_WIN32)
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
static char* map_hint = nullptr;
#if defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
// This OS has no flag to enforce allocation below the 4 GB boundary,
// but if we hint that we want a low address it is very likely we will
// get one.
// An older version of this code used MAP_FIXED, but that has the side
// effect of discarding already mapped pages that happen to be in the
// requested virtual memory range (such as the emulated RAM, sometimes).
if (low && (!map_hint))
map_hint = (char*)round_page(512*1024*1024); /* 0.5 GB rounded up to the next page */
#endif
void* ptr = mmap(map_hint, size, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_ANON | MAP_PRIVATE
#if defined(ARCHITECTURE_X64) && defined(MAP_32BIT)
| (low ? MAP_32BIT : 0)
#endif
, -1, 0);
#endif /* defined(_WIN32) */
#ifdef _WIN32
if (ptr == nullptr)
{
#else
if (ptr == MAP_FAILED)
{
ptr = nullptr;
#endif
ASSERT_MSG(false, "Failed to allocate executable memory");
}
#if !defined(_WIN32) && defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
else
{
if (low)
{
map_hint += size;
map_hint = (char*)round_page(map_hint); /* round up to the next page */
}
}
#endif
#if EMU_ARCH_BITS == 64
if ((u64)ptr >= 0x80000000 && low == true)
ASSERT_MSG(false, "Executable memory ended up above 2GB!");
#endif
return ptr;
}
void* AllocateMemoryPages(size_t size)
{
#ifdef _WIN32
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE);
#else
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED)
ptr = nullptr;
#endif
if (ptr == nullptr)
ASSERT_MSG(false, "Failed to allocate raw memory");
return ptr;
}
void* AllocateAlignedMemory(size_t size,size_t alignment)
{
#ifdef _WIN32
void* ptr = _aligned_malloc(size,alignment);
#else
void* ptr = nullptr;
#ifdef ANDROID
ptr = memalign(alignment, size);
#else
if (posix_memalign(&ptr, alignment, size) != 0)
ASSERT_MSG(false, "Failed to allocate aligned memory");
#endif
#endif
if (ptr == nullptr)
ASSERT_MSG(false, "Failed to allocate aligned memory");
return ptr;
}
void FreeMemoryPages(void* ptr, size_t size)
{
if (ptr)
{
#ifdef _WIN32
if (!VirtualFree(ptr, 0, MEM_RELEASE))
ASSERT_MSG(false, "FreeMemoryPages failed!\n%s", GetLastErrorMsg());
#else
munmap(ptr, size);
#endif
}
}
void FreeAlignedMemory(void* ptr)
{
if (ptr)
{
#ifdef _WIN32
_aligned_free(ptr);
#else
free(ptr);
#endif
}
}
void WriteProtectMemory(void* ptr, size_t size, bool allowExecute)
{
#ifdef _WIN32
DWORD oldValue;
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldValue))
ASSERT_MSG(false, "WriteProtectMemory failed!\n%s", GetLastErrorMsg());
#else
mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_EXEC) : PROT_READ);
#endif
}
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute)
{
#ifdef _WIN32
DWORD oldValue;
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldValue))
ASSERT_MSG(false, "UnWriteProtectMemory failed!\n%s", GetLastErrorMsg());
#else
mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_WRITE | PROT_EXEC) : PROT_WRITE | PROT_READ);
#endif
}
std::string MemUsage()
{
return "";
}

View file

@ -1,19 +0,0 @@
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <cstddef>
#include <string>
void* AllocateExecutableMemory(size_t size, bool low = true);
void* AllocateMemoryPages(size_t size);
void FreeMemoryPages(void* ptr, size_t size);
void* AllocateAlignedMemory(size_t size,size_t alignment);
void FreeAlignedMemory(void* ptr);
void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false);
std::string MemUsage();
inline int GetPageSize() { return 4096; }

View file

@ -1,363 +0,0 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "abi.h"
#include "emitter.h"
using namespace Gen;
// Shared code between Win64 and Unix64
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
size_t shadow = 0;
#if defined(_WIN32)
shadow = 0x20;
#endif
int count = (mask & ABI_ALL_GPRS).Count();
rsp_alignment -= count * 8;
size_t subtraction = 0;
int fpr_count = (mask & ABI_ALL_FPRS).Count();
if (fpr_count) {
// If we have any XMMs to save, we must align the stack here.
subtraction = rsp_alignment & 0xf;
}
subtraction += 16 * fpr_count;
size_t xmm_base_subtraction = subtraction;
subtraction += needed_frame_size;
subtraction += shadow;
// Final alignment.
rsp_alignment -= subtraction;
subtraction += rsp_alignment & 0xf;
*shadowp = shadow;
*subtractionp = subtraction;
*xmm_offsetp = subtraction - xmm_base_subtraction;
}
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
for (int r : mask & ABI_ALL_GPRS)
PUSH((X64Reg)r);
if (subtraction)
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int x : mask & ABI_ALL_FPRS) {
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
xmm_offset += 16;
}
return shadow;
}
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
size_t shadow, subtraction, xmm_offset;
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
for (int x : mask & ABI_ALL_FPRS) {
MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset));
xmm_offset += 16;
}
if (subtraction)
ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
for (int r = 15; r >= 0; r--) {
if (mask[r])
POP((X64Reg)r);
}
}
// Common functions
void XEmitter::ABI_CallFunction(const void *func) {
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(64, R(ABI_PARAM3), ImmPtr(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) {
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
MOV(64, R(ABI_PARAM4), ImmPtr(param4));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
if (!arg2.IsSimpleReg(ABI_PARAM2))
MOV(32, R(ABI_PARAM2), arg2);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
if (!arg2.IsSimpleReg(ABI_PARAM2))
MOV(32, R(ABI_PARAM2), arg2);
if (!arg3.IsSimpleReg(ABI_PARAM3))
MOV(32, R(ABI_PARAM3), arg3);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
MOV(64, R(ABI_PARAM2), ImmPtr(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
// Pass a register as a parameter.
void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
if (reg1 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(reg1));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
// Pass two registers as parameters.
void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
if (reg2 != ABI_PARAM1) {
if (reg1 != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg1));
if (reg2 != ABI_PARAM2)
MOV(64, R(ABI_PARAM2), R(reg2));
} else {
if (reg2 != ABI_PARAM2)
MOV(64, R(ABI_PARAM2), R(reg2));
if (reg1 != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg1));
}
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
{
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
MOV(32, R(ABI_PARAM2), Imm32(param2));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
{
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(64, R(ABI_PARAM3), Imm64(param3));
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
{
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}
void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
{
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(32, R(ABI_PARAM1), arg1);
if (!arg2.IsSimpleReg(ABI_PARAM2))
MOV(32, R(ABI_PARAM2), arg2);
u64 distance = u64(func) - (u64(code) + 5);
if (distance >= 0x0000000080000000ULL
&& distance < 0xFFFFFFFF80000000ULL) {
// Far call
MOV(64, R(RAX), ImmPtr(func));
CALLptr(R(RAX));
} else {
CALL(func);
}
}

View file

@ -1,59 +0,0 @@
// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
#include "common/bit_set.h"
#include "emitter.h"
// x64 ABI:s, and helpers to help follow them when JIT-ing code.
// All convensions return values in EAX (+ possibly EDX).
// Windows 64-bit
// * 4-reg "fastcall" variant, very new-skool stack handling
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
// Scratch: RAX RCX RDX R8 R9 R10 R11
// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
// Parameters: RCX RDX R8 R9, further MOV-ed
// Linux 64-bit
// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed)
// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11
// Callee-save: RBX RBP R12 R13 R14 R15
// Parameters: RDI RSI RDX RCX R8 R9
#define ABI_ALL_FPRS BitSet32(0xffff0000)
#define ABI_ALL_GPRS BitSet32(0x0000ffff)
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
#define ABI_PARAM1 RCX
#define ABI_PARAM2 RDX
#define ABI_PARAM3 R8
#define ABI_PARAM4 R9
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
#define ABI_ALL_CALLER_SAVED \
(BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \
XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 })
#else //64-bit Unix / OS X
#define ABI_PARAM1 RDI
#define ABI_PARAM2 RSI
#define ABI_PARAM3 RDX
#define ABI_PARAM4 RCX
#define ABI_PARAM5 R8
#define ABI_PARAM6 R9
// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
// don't actually clobber them.
#define ABI_ALL_CALLER_SAVED \
(BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
ABI_ALL_FPRS)
#endif // WIN32
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
#define ABI_RETURN RAX

View file

@ -1,197 +0,0 @@
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include <string>
#include <thread>
#include "common/common_types.h"
#include "cpu_detect.h"
namespace Common {
#ifndef _MSC_VER
#ifdef __FreeBSD__
#include <sys/types.h>
#include <machine/cpufunc.h>
#endif
static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
#ifdef __FreeBSD__
// Despite the name, this is just do_cpuid() with ECX as second input.
cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
#else
info[0] = function_id; // eax
info[2] = subfunction_id; // ecx
__asm__(
"cpuid"
: "=a" (info[0]),
"=b" (info[1]),
"=c" (info[2]),
"=d" (info[3])
: "a" (function_id),
"c" (subfunction_id)
);
#endif
}
static inline void __cpuid(int info[4], int function_id) {
return __cpuidex(info, function_id, 0);
}
#define _XCR_XFEATURE_ENABLED_MASK 0
static u64 _xgetbv(u32 index) {
u32 eax, edx;
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
return ((u64)edx << 32) | eax;
}
#endif // ifndef _MSC_VER
// Detects the various CPU features
static CPUCaps Detect() {
CPUCaps caps = {};
caps.num_cores = std::thread::hardware_concurrency();
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
// Citra at all anyway
int cpu_id[4];
memset(caps.brand_string, 0, sizeof(caps.brand_string));
// Detect CPU's CPUID capabilities and grab CPU string
__cpuid(cpu_id, 0x00000000);
u32 max_std_fn = cpu_id[0]; // EAX
std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
__cpuid(cpu_id, 0x80000000);
u32 max_ex_fn = cpu_id[0];
if (!strcmp(caps.brand_string, "GenuineIntel"))
caps.vendor = CPUVendor::INTEL;
else if (!strcmp(caps.brand_string, "AuthenticAMD"))
caps.vendor = CPUVendor::AMD;
else
caps.vendor = CPUVendor::OTHER;
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4996)
#endif
// Set reasonable default brand string even if brand string not available
strncpy(caps.cpu_string, caps.brand_string, sizeof(caps.cpu_string));
caps.cpu_string[sizeof(caps.cpu_string) - 1] = '\0';
#ifdef _MSC_VER
#pragma warning(pop)
#endif
// Detect family and other miscellaneous features
if (max_std_fn >= 1) {
__cpuid(cpu_id, 0x00000001);
if ((cpu_id[3] >> 25) & 1) caps.sse = true;
if ((cpu_id[3] >> 26) & 1) caps.sse2 = true;
if ((cpu_id[2]) & 1) caps.sse3 = true;
if ((cpu_id[2] >> 9) & 1) caps.ssse3 = true;
if ((cpu_id[2] >> 19) & 1) caps.sse4_1 = true;
if ((cpu_id[2] >> 20) & 1) caps.sse4_2 = true;
if ((cpu_id[2] >> 22) & 1) caps.movbe = true;
if ((cpu_id[2] >> 25) & 1) caps.aes = true;
if ((cpu_id[3] >> 24) & 1) {
caps.fxsave_fxrstor = true;
}
// AVX support requires 3 separate checks:
// - Is the AVX bit set in CPUID?
// - Is the XSAVE bit set in CPUID?
// - XGETBV result has the XCR bit set.
if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) {
if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
caps.avx = true;
if ((cpu_id[2] >> 12) & 1)
caps.fma = true;
}
}
if (max_std_fn >= 7) {
__cpuidex(cpu_id, 0x00000007, 0x00000000);
// Can't enable AVX2 unless the XSAVE/XGETBV checks above passed
if ((cpu_id[1] >> 5) & 1)
caps.avx2 = caps.avx;
if ((cpu_id[1] >> 3) & 1)
caps.bmi1 = true;
if ((cpu_id[1] >> 8) & 1)
caps.bmi2 = true;
}
}
caps.flush_to_zero = caps.sse;
if (max_ex_fn >= 0x80000004) {
// Extract CPU model string
__cpuid(cpu_id, 0x80000002);
std::memcpy(caps.cpu_string, cpu_id, sizeof(cpu_id));
__cpuid(cpu_id, 0x80000003);
std::memcpy(caps.cpu_string + 16, cpu_id, sizeof(cpu_id));
__cpuid(cpu_id, 0x80000004);
std::memcpy(caps.cpu_string + 32, cpu_id, sizeof(cpu_id));
}
if (max_ex_fn >= 0x80000001) {
// Check for more features
__cpuid(cpu_id, 0x80000001);
if (cpu_id[2] & 1) caps.lahf_sahf_64 = true;
if ((cpu_id[2] >> 5) & 1) caps.lzcnt = true;
if ((cpu_id[2] >> 16) & 1) caps.fma4 = true;
if ((cpu_id[3] >> 29) & 1) caps.long_mode = true;
}
return caps;
}
const CPUCaps& GetCPUCaps() {
static CPUCaps caps = Detect();
return caps;
}
std::string GetCPUCapsString() {
auto caps = GetCPUCaps();
std::string sum(caps.cpu_string);
sum += " (";
sum += caps.brand_string;
sum += ")";
if (caps.sse) sum += ", SSE";
if (caps.sse2) {
sum += ", SSE2";
if (!caps.flush_to_zero) sum += " (without DAZ)";
}
if (caps.sse3) sum += ", SSE3";
if (caps.ssse3) sum += ", SSSE3";
if (caps.sse4_1) sum += ", SSE4.1";
if (caps.sse4_2) sum += ", SSE4.2";
if (caps.avx) sum += ", AVX";
if (caps.avx2) sum += ", AVX2";
if (caps.bmi1) sum += ", BMI1";
if (caps.bmi2) sum += ", BMI2";
if (caps.fma) sum += ", FMA";
if (caps.aes) sum += ", AES";
if (caps.movbe) sum += ", MOVBE";
if (caps.long_mode) sum += ", 64-bit support";
return sum;
}
} // namespace Common

View file

@ -1,66 +0,0 @@
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string>
namespace Common {
/// x86/x64 CPU vendors that may be detected by this module
enum class CPUVendor {
INTEL,
AMD,
OTHER,
};
/// x86/x64 CPU capabilities that may be detected by this module
struct CPUCaps {
CPUVendor vendor;
char cpu_string[0x21];
char brand_string[0x41];
int num_cores;
bool sse;
bool sse2;
bool sse3;
bool ssse3;
bool sse4_1;
bool sse4_2;
bool lzcnt;
bool avx;
bool avx2;
bool bmi1;
bool bmi2;
bool fma;
bool fma4;
bool aes;
// Support for the FXSAVE and FXRSTOR instructions
bool fxsave_fxrstor;
bool movbe;
// This flag indicates that the hardware supports some mode in which denormal inputs and outputs
// are automatically set to (signed) zero.
bool flush_to_zero;
// Support for LAHF and SAHF instructions in 64-bit mode
bool lahf_sahf_64;
bool long_mode;
};
/**
* Gets the supported capabilities of the host CPU
* @return Reference to a CPUCaps struct with the detected host CPU capabilities
*/
const CPUCaps& GetCPUCaps();
/**
* Gets a string summary of the name and supported capabilities of the host CPU
* @return String summary
*/
std::string GetCPUCapsString();
} // namespace Common

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff