mirror of
https://git.suyu.dev/suyu/dynarmic.git
synced 2026-01-01 20:24:36 +01:00
Port x64 backend to xbyak
This commit is contained in:
parent
611cffb612
commit
e32812cd00
25 changed files with 1638 additions and 5323 deletions
|
|
@ -1,190 +0,0 @@
|
|||
// This file is under the public domain.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#ifdef _WIN32
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include <initializer_list>
|
||||
#include <new>
|
||||
#include <type_traits>
|
||||
#include "common/common_types.h"
|
||||
|
||||
// namespace avoids conflict with OS X Carbon; don't use BitSet<T> directly
|
||||
namespace Common {
|
||||
|
||||
// Helper functions:
|
||||
|
||||
#ifdef _WIN32
|
||||
template <typename T>
|
||||
static inline int CountSetBits(T v)
|
||||
{
|
||||
// from https://graphics.stanford.edu/~seander/bithacks.html
|
||||
// GCC has this built in, but MSVC's intrinsic will only emit the actual
|
||||
// POPCNT instruction, which we're not depending on
|
||||
v = v - ((v >> 1) & (T)~(T)0/3);
|
||||
v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);
|
||||
v = (v + (v >> 4)) & (T)~(T)0/255*15;
|
||||
return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * 8;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(uint8_t val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(uint16_t val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(uint32_t val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
static inline int LeastSignificantSetBit(uint64_t val)
|
||||
{
|
||||
unsigned long index;
|
||||
_BitScanForward64(&index, val);
|
||||
return (int)index;
|
||||
}
|
||||
#else
|
||||
static inline int CountSetBits(uint8_t val) { return __builtin_popcount(val); }
|
||||
static inline int CountSetBits(uint16_t val) { return __builtin_popcount(val); }
|
||||
static inline int CountSetBits(uint32_t val) { return __builtin_popcount(val); }
|
||||
static inline int CountSetBits(uint64_t val) { return __builtin_popcountll(val); }
|
||||
static inline int LeastSignificantSetBit(uint8_t val) { return __builtin_ctz(val); }
|
||||
static inline int LeastSignificantSetBit(uint16_t val) { return __builtin_ctz(val); }
|
||||
static inline int LeastSignificantSetBit(uint32_t val) { return __builtin_ctz(val); }
|
||||
static inline int LeastSignificantSetBit(uint64_t val) { return __builtin_ctzll(val); }
|
||||
#endif
|
||||
|
||||
// Similar to std::bitset, this is a class which encapsulates a bitset, i.e.
|
||||
// using the set bits of an integer to represent a set of integers. Like that
|
||||
// class, it acts like an array of bools:
|
||||
// BitSet32 bs;
|
||||
// bs[1] = true;
|
||||
// but also like the underlying integer ([0] = least significant bit):
|
||||
// BitSet32 bs2 = ...;
|
||||
// bs = (bs ^ bs2) & BitSet32(0xffff);
|
||||
// The following additional functionality is provided:
|
||||
// - Construction using an initializer list.
|
||||
// BitSet bs { 1, 2, 4, 8 };
|
||||
// - Efficiently iterating through the set bits:
|
||||
// for (int i : bs)
|
||||
// [i is the *index* of a set bit]
|
||||
// (This uses the appropriate CPU instruction to find the next set bit in one
|
||||
// operation.)
|
||||
// - Counting set bits using .Count() - see comment on that method.
|
||||
|
||||
// TODO: use constexpr when MSVC gets out of the Dark Ages
|
||||
|
||||
template <typename IntTy>
|
||||
class BitSet
|
||||
{
|
||||
static_assert(!std::is_signed<IntTy>::value, "BitSet should not be used with signed types");
|
||||
public:
|
||||
// A reference to a particular bit, returned from operator[].
|
||||
class Ref
|
||||
{
|
||||
public:
|
||||
Ref(Ref&& other) : m_bs(other.m_bs), m_mask(other.m_mask) {}
|
||||
Ref(BitSet* bs, IntTy mask) : m_bs(bs), m_mask(mask) {}
|
||||
operator bool() const { return (m_bs->m_val & m_mask) != 0; }
|
||||
bool operator=(bool set)
|
||||
{
|
||||
m_bs->m_val = (m_bs->m_val & ~m_mask) | (set ? m_mask : 0);
|
||||
return set;
|
||||
}
|
||||
private:
|
||||
BitSet* m_bs;
|
||||
IntTy m_mask;
|
||||
};
|
||||
|
||||
// A STL-like iterator is required to be able to use range-based for loops.
|
||||
class Iterator
|
||||
{
|
||||
public:
|
||||
Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
|
||||
Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {}
|
||||
Iterator& operator=(Iterator other) { new (this) Iterator(other); return *this; }
|
||||
int operator*() { return m_bit; }
|
||||
Iterator& operator++()
|
||||
{
|
||||
if (m_val == 0)
|
||||
{
|
||||
m_bit = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
int bit = LeastSignificantSetBit(m_val);
|
||||
m_val &= ~(1 << bit);
|
||||
m_bit = bit;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
Iterator operator++(int _)
|
||||
{
|
||||
Iterator other(*this);
|
||||
++*this;
|
||||
return other;
|
||||
}
|
||||
bool operator==(Iterator other) const { return m_bit == other.m_bit; }
|
||||
bool operator!=(Iterator other) const { return m_bit != other.m_bit; }
|
||||
private:
|
||||
IntTy m_val;
|
||||
int m_bit;
|
||||
};
|
||||
|
||||
BitSet() : m_val(0) {}
|
||||
explicit BitSet(IntTy val) : m_val(val) {}
|
||||
BitSet(std::initializer_list<int> init)
|
||||
{
|
||||
m_val = 0;
|
||||
for (int bit : init)
|
||||
m_val |= (IntTy)1 << bit;
|
||||
}
|
||||
|
||||
static BitSet AllTrue(size_t count)
|
||||
{
|
||||
return BitSet(count == sizeof(IntTy)*8 ? ~(IntTy)0 : (((IntTy)1 << count) - 1));
|
||||
}
|
||||
|
||||
Ref operator[](size_t bit) { return Ref(this, (IntTy)1 << bit); }
|
||||
const Ref operator[](size_t bit) const { return (*const_cast<BitSet*>(this))[bit]; }
|
||||
bool operator==(BitSet other) const { return m_val == other.m_val; }
|
||||
bool operator!=(BitSet other) const { return m_val != other.m_val; }
|
||||
bool operator<(BitSet other) const { return m_val < other.m_val; }
|
||||
bool operator>(BitSet other) const { return m_val > other.m_val; }
|
||||
BitSet operator|(BitSet other) const { return BitSet(m_val | other.m_val); }
|
||||
BitSet operator&(BitSet other) const { return BitSet(m_val & other.m_val); }
|
||||
BitSet operator^(BitSet other) const { return BitSet(m_val ^ other.m_val); }
|
||||
BitSet operator~() const { return BitSet(~m_val); }
|
||||
BitSet& operator|=(BitSet other) { return *this = *this | other; }
|
||||
BitSet& operator&=(BitSet other) { return *this = *this & other; }
|
||||
BitSet& operator^=(BitSet other) { return *this = *this ^ other; }
|
||||
operator uint32_t() = delete;
|
||||
operator bool() { return m_val != 0; }
|
||||
|
||||
// Warning: Even though on modern CPUs this is a single fast instruction,
|
||||
// Dolphin's official builds do not currently assume POPCNT support on x86,
|
||||
// so slower explicit bit twiddling is generated. Still should generally
|
||||
// be faster than a loop.
|
||||
unsigned int Count() const { return CountSetBits(m_val); }
|
||||
|
||||
Iterator begin() const { Iterator it(m_val, 0); return ++it; }
|
||||
Iterator end() const { return Iterator(m_val, -1); }
|
||||
|
||||
IntTy m_val;
|
||||
};
|
||||
|
||||
} // Common
|
||||
|
||||
typedef Common::BitSet<uint8_t> BitSet8;
|
||||
typedef Common::BitSet<uint16_t> BitSet16;
|
||||
typedef Common::BitSet<uint32_t> BitSet32;
|
||||
typedef Common::BitSet<uint64_t> BitSet64;
|
||||
|
|
@ -1,92 +0,0 @@
|
|||
// Copyright 2013 Dolphin Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/memory_util.h"
|
||||
|
||||
// Everything that needs to generate code should inherit from this.
|
||||
// You get memory management for free, plus, you can use all emitter functions without
|
||||
// having to prefix them with gen-> or something similar.
|
||||
// Example implementation:
|
||||
// class JIT : public CodeBlock<ARMXEmitter> {}
|
||||
template<class T> class CodeBlock : public T
|
||||
{
|
||||
private:
|
||||
// A privately used function to set the executable RAM space to something invalid.
|
||||
// For debugging usefulness it should be used to set the RAM to a host specific breakpoint instruction
|
||||
virtual void PoisonMemory() = 0;
|
||||
|
||||
protected:
|
||||
u8 *region;
|
||||
size_t region_size;
|
||||
|
||||
public:
|
||||
CodeBlock() : region(nullptr), region_size(0) {}
|
||||
virtual ~CodeBlock() { if (region) FreeCodeSpace(); }
|
||||
|
||||
CodeBlock(const CodeBlock&) = delete;
|
||||
CodeBlock& operator=(const CodeBlock&) = delete;
|
||||
|
||||
// Call this before you generate any code.
|
||||
void AllocCodeSpace(int size)
|
||||
{
|
||||
region_size = size;
|
||||
region = (u8*)AllocateExecutableMemory(region_size);
|
||||
T::SetCodePtr(region);
|
||||
}
|
||||
|
||||
// Always clear code space with breakpoints, so that if someone accidentally executes
|
||||
// uninitialized, it just breaks into the debugger.
|
||||
void ClearCodeSpace()
|
||||
{
|
||||
PoisonMemory();
|
||||
ResetCodePtr();
|
||||
}
|
||||
|
||||
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
|
||||
void FreeCodeSpace()
|
||||
{
|
||||
#ifdef __SYMBIAN32__
|
||||
ResetExecutableMemory(region);
|
||||
#else
|
||||
FreeMemoryPages(region, region_size);
|
||||
#endif
|
||||
region = nullptr;
|
||||
region_size = 0;
|
||||
}
|
||||
|
||||
bool IsInSpace(const u8 *ptr)
|
||||
{
|
||||
return (ptr >= region) && (ptr < (region + region_size));
|
||||
}
|
||||
|
||||
// Cannot currently be undone. Will write protect the entire code region.
|
||||
// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
|
||||
void WriteProtect()
|
||||
{
|
||||
WriteProtectMemory(region, region_size, true);
|
||||
}
|
||||
|
||||
void ResetCodePtr()
|
||||
{
|
||||
T::SetCodePtr(region);
|
||||
}
|
||||
|
||||
size_t GetSpaceLeft() const
|
||||
{
|
||||
return region_size - (T::GetCodePtr() - region);
|
||||
}
|
||||
|
||||
u8 *GetBasePtr() {
|
||||
return region;
|
||||
}
|
||||
|
||||
size_t GetOffset(const u8 *ptr) const {
|
||||
return ptr - region;
|
||||
}
|
||||
};
|
||||
39
src/common/iterator_util.h
Normal file
39
src/common/iterator_util.h
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2016 MerryMage
|
||||
* This software may be used and distributed according to the terms of the GNU
|
||||
* General Public License version 2 or any later version.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iterator>
|
||||
|
||||
namespace Dynarmic {
|
||||
namespace Common {
|
||||
|
||||
namespace detail {
|
||||
|
||||
template<typename T>
|
||||
struct ReverseAdapter {
|
||||
T& iterable;
|
||||
|
||||
auto begin() {
|
||||
using namespace std;
|
||||
return rbegin(iterable);
|
||||
}
|
||||
|
||||
auto end() {
|
||||
using namespace std;
|
||||
return rend(iterable);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template<typename T>
|
||||
detail::ReverseAdapter<T> Reverse(T&& iterable) {
|
||||
return detail::ReverseAdapter<T>{iterable};
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
} // namespace Dynarmic
|
||||
|
|
@ -1,192 +0,0 @@
|
|||
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/memory_util.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <psapi.h>
|
||||
#include "common/string_util.h"
|
||||
#else
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32) && defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
|
||||
#include <unistd.h>
|
||||
#define PAGE_MASK (getpagesize() - 1)
|
||||
#define round_page(x) ((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK))
|
||||
#endif
|
||||
|
||||
// Generic function to get last error message.
|
||||
// Call directly after the command or use the error num.
|
||||
// This function might change the error code.
|
||||
const char* GetLastErrorMsg()
|
||||
{
|
||||
static const size_t buff_size = 255;
|
||||
|
||||
#ifdef _WIN32
|
||||
static thread_local char err_str[buff_size] = {};
|
||||
|
||||
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, nullptr, GetLastError(),
|
||||
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||
err_str, buff_size, nullptr);
|
||||
#else
|
||||
static __thread char err_str[buff_size] = {};
|
||||
|
||||
// Thread safe (XSI-compliant)
|
||||
strerror_r(errno, err_str, buff_size);
|
||||
#endif
|
||||
|
||||
return err_str;
|
||||
}
|
||||
|
||||
|
||||
// This is purposely not a full wrapper for virtualalloc/mmap, but it
|
||||
// provides exactly the primitive operations that Dolphin needs.
|
||||
|
||||
void* AllocateExecutableMemory(size_t size, bool low)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||
#else
|
||||
static char* map_hint = nullptr;
|
||||
#if defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
|
||||
// This OS has no flag to enforce allocation below the 4 GB boundary,
|
||||
// but if we hint that we want a low address it is very likely we will
|
||||
// get one.
|
||||
// An older version of this code used MAP_FIXED, but that has the side
|
||||
// effect of discarding already mapped pages that happen to be in the
|
||||
// requested virtual memory range (such as the emulated RAM, sometimes).
|
||||
if (low && (!map_hint))
|
||||
map_hint = (char*)round_page(512*1024*1024); /* 0.5 GB rounded up to the next page */
|
||||
#endif
|
||||
void* ptr = mmap(map_hint, size, PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_ANON | MAP_PRIVATE
|
||||
#if defined(ARCHITECTURE_X64) && defined(MAP_32BIT)
|
||||
| (low ? MAP_32BIT : 0)
|
||||
#endif
|
||||
, -1, 0);
|
||||
#endif /* defined(_WIN32) */
|
||||
|
||||
#ifdef _WIN32
|
||||
if (ptr == nullptr)
|
||||
{
|
||||
#else
|
||||
if (ptr == MAP_FAILED)
|
||||
{
|
||||
ptr = nullptr;
|
||||
#endif
|
||||
ASSERT_MSG(false, "Failed to allocate executable memory");
|
||||
}
|
||||
#if !defined(_WIN32) && defined(ARCHITECTURE_X64) && !defined(MAP_32BIT)
|
||||
else
|
||||
{
|
||||
if (low)
|
||||
{
|
||||
map_hint += size;
|
||||
map_hint = (char*)round_page(map_hint); /* round up to the next page */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if EMU_ARCH_BITS == 64
|
||||
if ((u64)ptr >= 0x80000000 && low == true)
|
||||
ASSERT_MSG(false, "Executable memory ended up above 2GB!");
|
||||
#endif
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void* AllocateMemoryPages(size_t size)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
void* ptr = VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
#else
|
||||
void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE,
|
||||
MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
|
||||
if (ptr == MAP_FAILED)
|
||||
ptr = nullptr;
|
||||
#endif
|
||||
|
||||
if (ptr == nullptr)
|
||||
ASSERT_MSG(false, "Failed to allocate raw memory");
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void* AllocateAlignedMemory(size_t size,size_t alignment)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
void* ptr = _aligned_malloc(size,alignment);
|
||||
#else
|
||||
void* ptr = nullptr;
|
||||
#ifdef ANDROID
|
||||
ptr = memalign(alignment, size);
|
||||
#else
|
||||
if (posix_memalign(&ptr, alignment, size) != 0)
|
||||
ASSERT_MSG(false, "Failed to allocate aligned memory");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (ptr == nullptr)
|
||||
ASSERT_MSG(false, "Failed to allocate aligned memory");
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void FreeMemoryPages(void* ptr, size_t size)
|
||||
{
|
||||
if (ptr)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if (!VirtualFree(ptr, 0, MEM_RELEASE))
|
||||
ASSERT_MSG(false, "FreeMemoryPages failed!\n%s", GetLastErrorMsg());
|
||||
#else
|
||||
munmap(ptr, size);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void FreeAlignedMemory(void* ptr)
|
||||
{
|
||||
if (ptr)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void WriteProtectMemory(void* ptr, size_t size, bool allowExecute)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD oldValue;
|
||||
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldValue))
|
||||
ASSERT_MSG(false, "WriteProtectMemory failed!\n%s", GetLastErrorMsg());
|
||||
#else
|
||||
mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_EXEC) : PROT_READ);
|
||||
#endif
|
||||
}
|
||||
|
||||
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD oldValue;
|
||||
if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldValue))
|
||||
ASSERT_MSG(false, "UnWriteProtectMemory failed!\n%s", GetLastErrorMsg());
|
||||
#else
|
||||
mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_WRITE | PROT_EXEC) : PROT_WRITE | PROT_READ);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string MemUsage()
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
// Copyright 2013 Dolphin Emulator Project / 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
void* AllocateExecutableMemory(size_t size, bool low = true);
|
||||
void* AllocateMemoryPages(size_t size);
|
||||
void FreeMemoryPages(void* ptr, size_t size);
|
||||
void* AllocateAlignedMemory(size_t size,size_t alignment);
|
||||
void FreeAlignedMemory(void* ptr);
|
||||
void WriteProtectMemory(void* ptr, size_t size, bool executable = false);
|
||||
void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute = false);
|
||||
std::string MemUsage();
|
||||
|
||||
inline int GetPageSize() { return 4096; }
|
||||
|
|
@ -1,363 +0,0 @@
|
|||
// Copyright (C) 2003 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "abi.h"
|
||||
#include "emitter.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
// Shared code between Win64 and Unix64
|
||||
|
||||
void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) {
|
||||
size_t shadow = 0;
|
||||
#if defined(_WIN32)
|
||||
shadow = 0x20;
|
||||
#endif
|
||||
|
||||
int count = (mask & ABI_ALL_GPRS).Count();
|
||||
rsp_alignment -= count * 8;
|
||||
size_t subtraction = 0;
|
||||
int fpr_count = (mask & ABI_ALL_FPRS).Count();
|
||||
if (fpr_count) {
|
||||
// If we have any XMMs to save, we must align the stack here.
|
||||
subtraction = rsp_alignment & 0xf;
|
||||
}
|
||||
subtraction += 16 * fpr_count;
|
||||
size_t xmm_base_subtraction = subtraction;
|
||||
subtraction += needed_frame_size;
|
||||
subtraction += shadow;
|
||||
// Final alignment.
|
||||
rsp_alignment -= subtraction;
|
||||
subtraction += rsp_alignment & 0xf;
|
||||
|
||||
*shadowp = shadow;
|
||||
*subtractionp = subtraction;
|
||||
*xmm_offsetp = subtraction - xmm_base_subtraction;
|
||||
}
|
||||
|
||||
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
|
||||
size_t shadow, subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||
|
||||
for (int r : mask & ABI_ALL_GPRS)
|
||||
PUSH((X64Reg)r);
|
||||
|
||||
if (subtraction)
|
||||
SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
|
||||
|
||||
for (int x : mask & ABI_ALL_FPRS) {
|
||||
MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
|
||||
return shadow;
|
||||
}
|
||||
|
||||
void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) {
|
||||
size_t shadow, subtraction, xmm_offset;
|
||||
ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset);
|
||||
|
||||
for (int x : mask & ABI_ALL_FPRS) {
|
||||
MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset));
|
||||
xmm_offset += 16;
|
||||
}
|
||||
|
||||
if (subtraction)
|
||||
ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction));
|
||||
|
||||
for (int r = 15; r >= 0; r--) {
|
||||
if (mask[r])
|
||||
POP((X64Reg)r);
|
||||
}
|
||||
}
|
||||
|
||||
// Common functions
|
||||
void XEmitter::ABI_CallFunction(const void *func) {
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32((u32)param1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32((u32)param2));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(64, R(ABI_PARAM3), ImmPtr(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) {
|
||||
MOV(32, R(ABI_PARAM1), Imm32(param1));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(param3));
|
||||
MOV(64, R(ABI_PARAM4), ImmPtr(param4));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionP(const void *func, void *param1) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
if (!arg2.IsSimpleReg(ABI_PARAM2))
|
||||
MOV(32, R(ABI_PARAM2), arg2);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
if (!arg2.IsSimpleReg(ABI_PARAM2))
|
||||
MOV(32, R(ABI_PARAM2), arg2);
|
||||
if (!arg3.IsSimpleReg(ABI_PARAM3))
|
||||
MOV(32, R(ABI_PARAM3), arg3);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) {
|
||||
MOV(64, R(ABI_PARAM1), ImmPtr(param1));
|
||||
MOV(64, R(ABI_PARAM2), ImmPtr(param2));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
// Pass a register as a parameter.
|
||||
void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) {
|
||||
if (reg1 != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R(reg1));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
// Pass two registers as parameters.
|
||||
void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) {
|
||||
if (reg2 != ABI_PARAM1) {
|
||||
if (reg1 != ABI_PARAM1)
|
||||
MOV(64, R(ABI_PARAM1), R(reg1));
|
||||
if (reg2 != ABI_PARAM2)
|
||||
MOV(64, R(ABI_PARAM2), R(reg2));
|
||||
} else {
|
||||
if (reg2 != ABI_PARAM2)
|
||||
MOV(64, R(ABI_PARAM2), R(reg2));
|
||||
if (reg1 != ABI_PARAM1)
|
||||
MOV(64, R(ABI_PARAM1), R(reg1));
|
||||
}
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2)
|
||||
{
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3)
|
||||
{
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
MOV(32, R(ABI_PARAM2), Imm32(param2));
|
||||
MOV(64, R(ABI_PARAM3), Imm64(param3));
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1)
|
||||
{
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
||||
void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2)
|
||||
{
|
||||
if (!arg1.IsSimpleReg(ABI_PARAM1))
|
||||
MOV(32, R(ABI_PARAM1), arg1);
|
||||
if (!arg2.IsSimpleReg(ABI_PARAM2))
|
||||
MOV(32, R(ABI_PARAM2), arg2);
|
||||
u64 distance = u64(func) - (u64(code) + 5);
|
||||
if (distance >= 0x0000000080000000ULL
|
||||
&& distance < 0xFFFFFFFF80000000ULL) {
|
||||
// Far call
|
||||
MOV(64, R(RAX), ImmPtr(func));
|
||||
CALLptr(R(RAX));
|
||||
} else {
|
||||
CALL(func);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/bit_set.h"
|
||||
#include "emitter.h"
|
||||
|
||||
// x64 ABI:s, and helpers to help follow them when JIT-ing code.
|
||||
// All convensions return values in EAX (+ possibly EDX).
|
||||
|
||||
// Windows 64-bit
|
||||
// * 4-reg "fastcall" variant, very new-skool stack handling
|
||||
// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_
|
||||
// * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space.
|
||||
// Scratch: RAX RCX RDX R8 R9 R10 R11
|
||||
// Callee-save: RBX RSI RDI RBP R12 R13 R14 R15
|
||||
// Parameters: RCX RDX R8 R9, further MOV-ed
|
||||
|
||||
// Linux 64-bit
|
||||
// * 6-reg "fastcall" variant, old skool stack handling (parameters are pushed)
|
||||
// Scratch: RAX RCX RDX RSI RDI R8 R9 R10 R11
|
||||
// Callee-save: RBX RBP R12 R13 R14 R15
|
||||
// Parameters: RDI RSI RDX RCX R8 R9
|
||||
|
||||
#define ABI_ALL_FPRS BitSet32(0xffff0000)
|
||||
#define ABI_ALL_GPRS BitSet32(0x0000ffff)
|
||||
|
||||
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
|
||||
|
||||
#define ABI_PARAM1 RCX
|
||||
#define ABI_PARAM2 RDX
|
||||
#define ABI_PARAM3 R8
|
||||
#define ABI_PARAM4 R9
|
||||
|
||||
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \
|
||||
XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 })
|
||||
#else //64-bit Unix / OS X
|
||||
|
||||
#define ABI_PARAM1 RDI
|
||||
#define ABI_PARAM2 RSI
|
||||
#define ABI_PARAM3 RDX
|
||||
#define ABI_PARAM4 RCX
|
||||
#define ABI_PARAM5 R8
|
||||
#define ABI_PARAM6 R9
|
||||
|
||||
// TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably
|
||||
// don't actually clobber them.
|
||||
#define ABI_ALL_CALLER_SAVED \
|
||||
(BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \
|
||||
ABI_ALL_FPRS)
|
||||
#endif // WIN32
|
||||
|
||||
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
|
||||
|
||||
#define ABI_RETURN RAX
|
||||
|
|
@ -1,197 +0,0 @@
|
|||
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include "cpu_detect.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
#ifndef _MSC_VER
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#include <sys/types.h>
|
||||
#include <machine/cpufunc.h>
|
||||
#endif
|
||||
|
||||
static inline void __cpuidex(int info[4], int function_id, int subfunction_id) {
|
||||
#ifdef __FreeBSD__
|
||||
// Despite the name, this is just do_cpuid() with ECX as second input.
|
||||
cpuid_count((u_int)function_id, (u_int)subfunction_id, (u_int*)info);
|
||||
#else
|
||||
info[0] = function_id; // eax
|
||||
info[2] = subfunction_id; // ecx
|
||||
__asm__(
|
||||
"cpuid"
|
||||
: "=a" (info[0]),
|
||||
"=b" (info[1]),
|
||||
"=c" (info[2]),
|
||||
"=d" (info[3])
|
||||
: "a" (function_id),
|
||||
"c" (subfunction_id)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void __cpuid(int info[4], int function_id) {
|
||||
return __cpuidex(info, function_id, 0);
|
||||
}
|
||||
|
||||
#define _XCR_XFEATURE_ENABLED_MASK 0
|
||||
static u64 _xgetbv(u32 index) {
|
||||
u32 eax, edx;
|
||||
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
|
||||
return ((u64)edx << 32) | eax;
|
||||
}
|
||||
|
||||
#endif // ifndef _MSC_VER
|
||||
|
||||
// Detects the various CPU features
|
||||
static CPUCaps Detect() {
|
||||
CPUCaps caps = {};
|
||||
|
||||
caps.num_cores = std::thread::hardware_concurrency();
|
||||
|
||||
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
|
||||
// Citra at all anyway
|
||||
|
||||
int cpu_id[4];
|
||||
memset(caps.brand_string, 0, sizeof(caps.brand_string));
|
||||
|
||||
// Detect CPU's CPUID capabilities and grab CPU string
|
||||
__cpuid(cpu_id, 0x00000000);
|
||||
u32 max_std_fn = cpu_id[0]; // EAX
|
||||
|
||||
std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
|
||||
std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
|
||||
std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
|
||||
|
||||
__cpuid(cpu_id, 0x80000000);
|
||||
|
||||
u32 max_ex_fn = cpu_id[0];
|
||||
if (!strcmp(caps.brand_string, "GenuineIntel"))
|
||||
caps.vendor = CPUVendor::INTEL;
|
||||
else if (!strcmp(caps.brand_string, "AuthenticAMD"))
|
||||
caps.vendor = CPUVendor::AMD;
|
||||
else
|
||||
caps.vendor = CPUVendor::OTHER;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4996)
|
||||
#endif
|
||||
|
||||
// Set reasonable default brand string even if brand string not available
|
||||
strncpy(caps.cpu_string, caps.brand_string, sizeof(caps.cpu_string));
|
||||
caps.cpu_string[sizeof(caps.cpu_string) - 1] = '\0';
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
// Detect family and other miscellaneous features
|
||||
if (max_std_fn >= 1) {
|
||||
__cpuid(cpu_id, 0x00000001);
|
||||
|
||||
if ((cpu_id[3] >> 25) & 1) caps.sse = true;
|
||||
if ((cpu_id[3] >> 26) & 1) caps.sse2 = true;
|
||||
if ((cpu_id[2]) & 1) caps.sse3 = true;
|
||||
if ((cpu_id[2] >> 9) & 1) caps.ssse3 = true;
|
||||
if ((cpu_id[2] >> 19) & 1) caps.sse4_1 = true;
|
||||
if ((cpu_id[2] >> 20) & 1) caps.sse4_2 = true;
|
||||
if ((cpu_id[2] >> 22) & 1) caps.movbe = true;
|
||||
if ((cpu_id[2] >> 25) & 1) caps.aes = true;
|
||||
|
||||
if ((cpu_id[3] >> 24) & 1) {
|
||||
caps.fxsave_fxrstor = true;
|
||||
}
|
||||
|
||||
// AVX support requires 3 separate checks:
|
||||
// - Is the AVX bit set in CPUID?
|
||||
// - Is the XSAVE bit set in CPUID?
|
||||
// - XGETBV result has the XCR bit set.
|
||||
if (((cpu_id[2] >> 28) & 1) && ((cpu_id[2] >> 27) & 1)) {
|
||||
if ((_xgetbv(_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6) {
|
||||
caps.avx = true;
|
||||
if ((cpu_id[2] >> 12) & 1)
|
||||
caps.fma = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_std_fn >= 7) {
|
||||
__cpuidex(cpu_id, 0x00000007, 0x00000000);
|
||||
// Can't enable AVX2 unless the XSAVE/XGETBV checks above passed
|
||||
if ((cpu_id[1] >> 5) & 1)
|
||||
caps.avx2 = caps.avx;
|
||||
if ((cpu_id[1] >> 3) & 1)
|
||||
caps.bmi1 = true;
|
||||
if ((cpu_id[1] >> 8) & 1)
|
||||
caps.bmi2 = true;
|
||||
}
|
||||
}
|
||||
|
||||
caps.flush_to_zero = caps.sse;
|
||||
|
||||
if (max_ex_fn >= 0x80000004) {
|
||||
// Extract CPU model string
|
||||
__cpuid(cpu_id, 0x80000002);
|
||||
std::memcpy(caps.cpu_string, cpu_id, sizeof(cpu_id));
|
||||
__cpuid(cpu_id, 0x80000003);
|
||||
std::memcpy(caps.cpu_string + 16, cpu_id, sizeof(cpu_id));
|
||||
__cpuid(cpu_id, 0x80000004);
|
||||
std::memcpy(caps.cpu_string + 32, cpu_id, sizeof(cpu_id));
|
||||
}
|
||||
|
||||
if (max_ex_fn >= 0x80000001) {
|
||||
// Check for more features
|
||||
__cpuid(cpu_id, 0x80000001);
|
||||
if (cpu_id[2] & 1) caps.lahf_sahf_64 = true;
|
||||
if ((cpu_id[2] >> 5) & 1) caps.lzcnt = true;
|
||||
if ((cpu_id[2] >> 16) & 1) caps.fma4 = true;
|
||||
if ((cpu_id[3] >> 29) & 1) caps.long_mode = true;
|
||||
}
|
||||
|
||||
return caps;
|
||||
}
|
||||
|
||||
const CPUCaps& GetCPUCaps() {
|
||||
static CPUCaps caps = Detect();
|
||||
return caps;
|
||||
}
|
||||
|
||||
std::string GetCPUCapsString() {
|
||||
auto caps = GetCPUCaps();
|
||||
|
||||
std::string sum(caps.cpu_string);
|
||||
sum += " (";
|
||||
sum += caps.brand_string;
|
||||
sum += ")";
|
||||
|
||||
if (caps.sse) sum += ", SSE";
|
||||
if (caps.sse2) {
|
||||
sum += ", SSE2";
|
||||
if (!caps.flush_to_zero) sum += " (without DAZ)";
|
||||
}
|
||||
|
||||
if (caps.sse3) sum += ", SSE3";
|
||||
if (caps.ssse3) sum += ", SSSE3";
|
||||
if (caps.sse4_1) sum += ", SSE4.1";
|
||||
if (caps.sse4_2) sum += ", SSE4.2";
|
||||
if (caps.avx) sum += ", AVX";
|
||||
if (caps.avx2) sum += ", AVX2";
|
||||
if (caps.bmi1) sum += ", BMI1";
|
||||
if (caps.bmi2) sum += ", BMI2";
|
||||
if (caps.fma) sum += ", FMA";
|
||||
if (caps.aes) sum += ", AES";
|
||||
if (caps.movbe) sum += ", MOVBE";
|
||||
if (caps.long_mode) sum += ", 64-bit support";
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
// Copyright 2013 Dolphin Emulator Project / 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Common {
|
||||
|
||||
/// x86/x64 CPU vendors that may be detected by this module
|
||||
enum class CPUVendor {
|
||||
INTEL,
|
||||
AMD,
|
||||
OTHER,
|
||||
};
|
||||
|
||||
/// x86/x64 CPU capabilities that may be detected by this module
|
||||
struct CPUCaps {
|
||||
CPUVendor vendor;
|
||||
char cpu_string[0x21];
|
||||
char brand_string[0x41];
|
||||
int num_cores;
|
||||
bool sse;
|
||||
bool sse2;
|
||||
bool sse3;
|
||||
bool ssse3;
|
||||
bool sse4_1;
|
||||
bool sse4_2;
|
||||
bool lzcnt;
|
||||
bool avx;
|
||||
bool avx2;
|
||||
bool bmi1;
|
||||
bool bmi2;
|
||||
bool fma;
|
||||
bool fma4;
|
||||
bool aes;
|
||||
|
||||
// Support for the FXSAVE and FXRSTOR instructions
|
||||
bool fxsave_fxrstor;
|
||||
|
||||
bool movbe;
|
||||
|
||||
// This flag indicates that the hardware supports some mode in which denormal inputs and outputs
|
||||
// are automatically set to (signed) zero.
|
||||
bool flush_to_zero;
|
||||
|
||||
// Support for LAHF and SAHF instructions in 64-bit mode
|
||||
bool lahf_sahf_64;
|
||||
|
||||
bool long_mode;
|
||||
};
|
||||
|
||||
/**
|
||||
* Gets the supported capabilities of the host CPU
|
||||
* @return Reference to a CPUCaps struct with the detected host CPU capabilities
|
||||
*/
|
||||
const CPUCaps& GetCPUCaps();
|
||||
|
||||
/**
|
||||
* Gets a string summary of the name and supported capabilities of the host CPU
|
||||
* @return String summary
|
||||
*/
|
||||
std::string GetCPUCapsString();
|
||||
|
||||
} // namespace Common
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue