mirror of
https://git.suyu.dev/suyu/dynarmic.git
synced 2026-01-07 06:58:15 +01:00
Squashed 'externals/xbyak/' changes from 671fc805..4a6fac8a
4a6fac8a update version to 5.77 801cf3fd cosmetic change of getNumCores d397e824 fix number of cores that share LLC cache a669e092 support non-intel-cpu visual studio af5f422e Merge branch 'fenghaitao-guard_x86' into develop 9b98dc17 Guard x86 specific codes with "#if defined(__i386__) || defined(__x86_64__)" dd4173e1 move some member variables input private f72646a7 update version 4612528f format change 4b95e862 Merge branch 'shelleygoel-master' 4c262fa6 add functionality to get num of cores using x2APIC ID bc70e7e1 recover Xbyak::CastTo d09a230f unlink Label when LabelManager is destroyed 973e8597 update version afdb9fe9 Xbyak::CastTo is removed b011aca4 add RegRip +/- int acae93cd increase max temp regs for StackFrame ea4e3562 util::StackFrame uses push/pop instead of mov 42462ef9 use evex encoding for vpslld/vpslldq/vpsraw/...(reg, mem, imm); da9117a9 update version of readme.md d35f4fb7 fix the encoding of vinsertps for disp8N 1de435ed bf uses Label class 613922bd add Label L() for convenience 43e15583 fix typo 93579ee6 add protect-re.cpp 60004b5c fix url of protect-re.cpp 348b2709 fix typo of doc f34f6ed5 update manual 232110be update test 82b78bf0 add setProtectMode dd8b290f put warning message if pageSize != 4096 64775ca2 a little refactoring 7c3e7b85 fix wrong VSIB encoding with idx >= 16 git-subtree-dir: externals/xbyak git-subtree-split: 4a6fac8ade404f667b94170f713367fe7da2a852
This commit is contained in:
parent
dbb1f8cf37
commit
080b4b3aff
17 changed files with 994 additions and 489 deletions
108
xbyak/xbyak.h
108
xbyak/xbyak.h
|
|
@ -40,6 +40,8 @@
|
|||
// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
|
||||
#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
|
||||
((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
|
||||
#include <unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::unordered_set
|
||||
#include <unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
|
||||
|
|
@ -49,16 +51,22 @@
|
|||
libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
|
||||
*/
|
||||
#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
|
||||
#include <tr1/unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
|
||||
#include <tr1/unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
||||
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
|
||||
#include <unordered_set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
|
||||
#include <unordered_map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
|
||||
|
||||
#else
|
||||
#include <set>
|
||||
#define XBYAK_STD_UNORDERED_SET std::set
|
||||
#include <map>
|
||||
#define XBYAK_STD_UNORDERED_MAP std::map
|
||||
#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
|
||||
|
|
@ -105,7 +113,7 @@ namespace Xbyak {
|
|||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x5670 /* 0xABCD = A.BC(D) */
|
||||
VERSION = 0x5770 /* 0xABCD = A.BC(D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
|
@ -178,7 +186,8 @@ enum {
|
|||
ERR_INVALID_ZERO,
|
||||
ERR_INVALID_RIP_IN_AUTO_GROW,
|
||||
ERR_INVALID_MIB_ADDRESS,
|
||||
ERR_INTERNAL
|
||||
ERR_INTERNAL,
|
||||
ERR_X2APIC_IS_NOT_SUPPORTED
|
||||
};
|
||||
|
||||
class Error : public std::exception {
|
||||
|
|
@ -240,6 +249,7 @@ public:
|
|||
"invalid rip in AutoGrow",
|
||||
"invalid mib address",
|
||||
"internal error",
|
||||
"x2APIC is not supported"
|
||||
};
|
||||
assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
|
||||
return errTbl[err_];
|
||||
|
|
@ -617,6 +627,12 @@ struct RegRip {
|
|||
const Label* label_;
|
||||
bool isAddr_;
|
||||
explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
|
||||
friend const RegRip operator+(const RegRip& r, int disp) {
|
||||
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||
}
|
||||
friend const RegRip operator-(const RegRip& r, int disp) {
|
||||
return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
|
||||
}
|
||||
friend const RegRip operator+(const RegRip& r, sint64 disp) {
|
||||
return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
|
||||
}
|
||||
|
|
@ -786,6 +802,7 @@ inline RegExp operator-(const RegExp& e, size_t disp)
|
|||
|
||||
// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
|
||||
void *const AutoGrow = (void*)1; //-V566
|
||||
void *const DontSetProtectRWE = (void*)2; //-V566
|
||||
|
||||
class CodeArray {
|
||||
enum Type {
|
||||
|
|
@ -825,6 +842,7 @@ protected:
|
|||
size_t size_;
|
||||
bool isCalledCalcJmpAddress_;
|
||||
|
||||
bool useProtect() const { return alloc_->useProtect(); }
|
||||
/*
|
||||
allocate new memory and copy old data to the new area
|
||||
*/
|
||||
|
|
@ -848,7 +866,6 @@ protected:
|
|||
uint64 disp = i->getVal(top_);
|
||||
rewrite(i->codeOffset, disp, i->jmpSize);
|
||||
}
|
||||
if (alloc_->useProtect() && !protect(top_, size_, PROTECT_RWE)) throw Error(ERR_CANT_PROTECT);
|
||||
isCalledCalcJmpAddress_ = true;
|
||||
}
|
||||
public:
|
||||
|
|
@ -858,7 +875,7 @@ public:
|
|||
PROTECT_RE = 2 // read/exec
|
||||
};
|
||||
explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
|
||||
: type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF)
|
||||
: type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
|
||||
, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
|
||||
, maxSize_(maxSize)
|
||||
, top_(type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
|
||||
|
|
@ -866,7 +883,7 @@ public:
|
|||
, isCalledCalcJmpAddress_(false)
|
||||
{
|
||||
if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
|
||||
if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, PROTECT_RWE)) {
|
||||
if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
|
||||
alloc_->free(top_);
|
||||
throw Error(ERR_CANT_PROTECT);
|
||||
}
|
||||
|
|
@ -874,10 +891,19 @@ public:
|
|||
virtual ~CodeArray()
|
||||
{
|
||||
if (isAllocType()) {
|
||||
if (alloc_->useProtect()) protect(top_, maxSize_, PROTECT_RW);
|
||||
if (useProtect()) setProtectModeRW(false);
|
||||
alloc_->free(top_);
|
||||
}
|
||||
}
|
||||
bool setProtectMode(ProtectMode mode, bool throwException = true)
|
||||
{
|
||||
bool isOK = protect(top_, maxSize_, mode);
|
||||
if (isOK) return true;
|
||||
if (throwException) throw Error(ERR_CANT_PROTECT);
|
||||
return false;
|
||||
}
|
||||
bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
|
||||
bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
|
||||
void resetSize()
|
||||
{
|
||||
size_ = 0;
|
||||
|
|
@ -909,10 +935,10 @@ public:
|
|||
void dq(uint64 code) { db(code, 8); }
|
||||
const uint8 *getCode() const { return top_; }
|
||||
template<class F>
|
||||
const F getCode() const { return CastTo<F>(top_); }
|
||||
const F getCode() const { return reinterpret_cast<F>(top_); }
|
||||
const uint8 *getCurr() const { return &top_[size_]; }
|
||||
template<class F>
|
||||
const F getCurr() const { return CastTo<F>(&top_[size_]); }
|
||||
const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
|
||||
size_t getSize() const { return size_; }
|
||||
void setSize(size_t size)
|
||||
{
|
||||
|
|
@ -995,6 +1021,9 @@ public:
|
|||
size_t pageSize = sysconf(_SC_PAGESIZE);
|
||||
size_t iaddr = reinterpret_cast<size_t>(addr);
|
||||
size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
|
||||
#ifndef NDEBUG
|
||||
if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
|
||||
#endif
|
||||
return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
|
||||
#else
|
||||
return true;
|
||||
|
|
@ -1115,6 +1144,7 @@ public:
|
|||
Label(const Label& rhs);
|
||||
Label& operator=(const Label& rhs);
|
||||
~Label();
|
||||
void clear() { mgr = 0; id = 0; }
|
||||
int getId() const { return id; }
|
||||
const uint8 *getAddress() const;
|
||||
|
||||
|
|
@ -1153,6 +1183,7 @@ class LabelManager {
|
|||
};
|
||||
typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
|
||||
typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
|
||||
typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
|
||||
|
||||
CodeArray *base_;
|
||||
// global : stateList_.front(), local : stateList_.back()
|
||||
|
|
@ -1160,6 +1191,7 @@ class LabelManager {
|
|||
mutable int labelId_;
|
||||
ClabelDefList clabelDefList_;
|
||||
ClabelUndefList clabelUndefList_;
|
||||
LabelPtrList labelPtrList_;
|
||||
|
||||
int getId(const Label& label) const
|
||||
{
|
||||
|
|
@ -1208,9 +1240,14 @@ class LabelManager {
|
|||
return true;
|
||||
}
|
||||
friend class Label;
|
||||
void incRefCount(int id) { clabelDefList_[id].refCount++; }
|
||||
void decRefCount(int id)
|
||||
void incRefCount(int id, Label *label)
|
||||
{
|
||||
clabelDefList_[id].refCount++;
|
||||
labelPtrList_.insert(label);
|
||||
}
|
||||
void decRefCount(int id, Label *label)
|
||||
{
|
||||
labelPtrList_.erase(label);
|
||||
ClabelDefList::iterator i = clabelDefList_.find(id);
|
||||
if (i == clabelDefList_.end()) return;
|
||||
if (i->second.refCount == 1) {
|
||||
|
|
@ -1229,11 +1266,23 @@ class LabelManager {
|
|||
#endif
|
||||
return !list.empty();
|
||||
}
|
||||
// detach all labels linked to LabelManager
|
||||
void resetLabelPtrList()
|
||||
{
|
||||
for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
|
||||
(*i)->clear();
|
||||
}
|
||||
labelPtrList_.clear();
|
||||
}
|
||||
public:
|
||||
LabelManager()
|
||||
{
|
||||
reset();
|
||||
}
|
||||
~LabelManager()
|
||||
{
|
||||
resetLabelPtrList();
|
||||
}
|
||||
void reset()
|
||||
{
|
||||
base_ = 0;
|
||||
|
|
@ -1243,6 +1292,7 @@ public:
|
|||
stateList_.push_back(SlabelState());
|
||||
clabelDefList_.clear();
|
||||
clabelUndefList_.clear();
|
||||
resetLabelPtrList();
|
||||
}
|
||||
void enterLocal()
|
||||
{
|
||||
|
|
@ -1275,10 +1325,11 @@ public:
|
|||
SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
|
||||
define_inner(st.defList, st.undefList, label, base_->getSize());
|
||||
}
|
||||
void defineClabel(const Label& label)
|
||||
void defineClabel(Label& label)
|
||||
{
|
||||
define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
|
||||
label.mgr = this;
|
||||
labelPtrList_.insert(&label);
|
||||
}
|
||||
void assign(Label& dst, const Label& src)
|
||||
{
|
||||
|
|
@ -1286,6 +1337,7 @@ public:
|
|||
if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
|
||||
define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
|
||||
dst.mgr = this;
|
||||
labelPtrList_.insert(&dst);
|
||||
}
|
||||
bool getOffset(size_t *offset, std::string& label) const
|
||||
{
|
||||
|
|
@ -1333,19 +1385,19 @@ inline Label::Label(const Label& rhs)
|
|||
{
|
||||
id = rhs.id;
|
||||
mgr = rhs.mgr;
|
||||
if (mgr) mgr->incRefCount(id);
|
||||
if (mgr) mgr->incRefCount(id, this);
|
||||
}
|
||||
inline Label& Label::operator=(const Label& rhs)
|
||||
{
|
||||
if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
|
||||
id = rhs.id;
|
||||
mgr = rhs.mgr;
|
||||
if (mgr) mgr->incRefCount(id);
|
||||
if (mgr) mgr->incRefCount(id, this);
|
||||
return *this;
|
||||
}
|
||||
inline Label::~Label()
|
||||
{
|
||||
if (id && mgr) mgr->decRefCount(id);
|
||||
if (id && mgr) mgr->decRefCount(id, this);
|
||||
}
|
||||
inline const uint8* Label::getAddress() const
|
||||
{
|
||||
|
|
@ -1463,6 +1515,7 @@ private:
|
|||
T_B64 = 1 << 27, // m64bcst
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
T_XXX
|
||||
};
|
||||
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
|
||||
|
|
@ -1500,7 +1553,7 @@ private:
|
|||
if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
|
||||
return v;
|
||||
}
|
||||
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0)
|
||||
int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
|
||||
{
|
||||
if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
|
||||
int w = (type & T_EW1) ? 1 : 0;
|
||||
|
|
@ -1543,7 +1596,7 @@ private:
|
|||
}
|
||||
}
|
||||
}
|
||||
bool Vp = !(v ? v->isExtIdx2() : 0);
|
||||
bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
|
||||
bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
|
||||
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
|
||||
db(0x62);
|
||||
|
|
@ -1935,10 +1988,11 @@ private:
|
|||
const Address& addr = op2.getAddress();
|
||||
const RegExp& regExp = addr.getRegExp();
|
||||
const Reg& base = regExp.getBase();
|
||||
const Reg& index = regExp.getIndex();
|
||||
if (BIT == 64 && addr.is32bit()) db(0x67);
|
||||
int disp8N = 0;
|
||||
bool x = regExp.getIndex().isExtIdx();
|
||||
if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
|
||||
bool x = index.isExtIdx();
|
||||
if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
|
||||
int aaa = addr.getOpmaskIdx();
|
||||
if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
|
||||
bool b = false;
|
||||
|
|
@ -1946,8 +2000,8 @@ private:
|
|||
if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
|
||||
b = true;
|
||||
}
|
||||
int VL = regExp.isVsib() ? regExp.getIndex().getBit() : 0;
|
||||
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL);
|
||||
int VL = regExp.isVsib() ? index.getBit() : 0;
|
||||
disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
|
||||
} else {
|
||||
vex(r, base, p1, type, code, x);
|
||||
}
|
||||
|
|
@ -2147,7 +2201,8 @@ public:
|
|||
const Segment es, cs, ss, ds, fs, gs;
|
||||
#endif
|
||||
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
||||
void L(const Label& label) { labelMgr_.defineClabel(label); }
|
||||
void L(Label& label) { labelMgr_.defineClabel(label); }
|
||||
Label L() { Label label; L(label); return label; }
|
||||
void inLocalLabel() { labelMgr_.enterLocal(); }
|
||||
void outLocalLabel() { labelMgr_.leaveLocal(); }
|
||||
/*
|
||||
|
|
@ -2178,7 +2233,7 @@ public:
|
|||
// call(function pointer)
|
||||
#ifdef XBYAK_VARIADIC_TEMPLATE
|
||||
template<class Ret, class... Params>
|
||||
void call(Ret(*func)(Params...)) { call(CastTo<const void*>(func)); }
|
||||
void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
|
||||
#endif
|
||||
void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
|
||||
|
||||
|
|
@ -2436,11 +2491,16 @@ public:
|
|||
MUST call ready() to complete generating code if you use AutoGrow mode.
|
||||
It is not necessary for the other mode if hasUndefinedLabel() is true.
|
||||
*/
|
||||
void ready()
|
||||
void ready(ProtectMode mode = PROTECT_RWE)
|
||||
{
|
||||
if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND);
|
||||
if (isAutoGrow()) calcJmpAddress();
|
||||
if (isAutoGrow()) {
|
||||
calcJmpAddress();
|
||||
if (useProtect()) setProtectMode(mode);
|
||||
}
|
||||
}
|
||||
// set read/exec
|
||||
void readyRE() { return ready(PROTECT_RE); }
|
||||
#ifdef XBYAK_TEST
|
||||
void dump(bool doClear = true)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "5.67"; }
|
||||
const char *getVersionString() const { return "5.77"; }
|
||||
void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||
|
|
@ -1023,7 +1023,7 @@ void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
|
|||
void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
|
||||
void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
|
||||
void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
|
||||
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
|
||||
void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
|
||||
void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
|
||||
void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
|
||||
void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }
|
||||
|
|
@ -1206,28 +1206,28 @@ void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm,
|
|||
void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
|
||||
void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
|
||||
void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
|
||||
void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
||||
void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||
void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
|
||||
void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
|
||||
void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
|
||||
void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
|
||||
void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
|
||||
void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
|
||||
void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
||||
void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||
void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
|
||||
void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
||||
void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||
void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
|
||||
void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
|
||||
void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
||||
void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||
void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
|
||||
void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
|
||||
void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
|
||||
void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
|
||||
void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
|
||||
void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
|
||||
void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
|
||||
void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
|
||||
void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
|
||||
void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
|
||||
void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
|
||||
void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
|
||||
void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
|
||||
void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
|
||||
void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }
|
||||
|
|
|
|||
|
|
@ -9,6 +9,11 @@
|
|||
*/
|
||||
#include "xbyak.h"
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
|
||||
#define XBYAK_INTEL_CPU_SPECIFIC
|
||||
#endif
|
||||
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
||||
static inline __declspec(naked) void __cpuid(int[4], int)
|
||||
|
|
@ -47,14 +52,30 @@
|
|||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Xbyak { namespace util {
|
||||
|
||||
typedef enum {
|
||||
SmtLevel = 1,
|
||||
CoreLevel = 2
|
||||
} IntelCpuTopologyLevel;
|
||||
|
||||
/**
|
||||
CPU detection class
|
||||
*/
|
||||
class Cpu {
|
||||
uint64 type_;
|
||||
//system topology
|
||||
bool x2APIC_supported_;
|
||||
static const size_t maxTopologyLevels = 2;
|
||||
unsigned int numCores_[maxTopologyLevels];
|
||||
|
||||
static const unsigned int maxNumberCacheLevels = 10;
|
||||
unsigned int dataCacheSize_[maxNumberCacheLevels];
|
||||
unsigned int coresSharignDataCache_[maxNumberCacheLevels];
|
||||
unsigned int dataCacheLevels_;
|
||||
|
||||
unsigned int get32bitAsBE(const char *x) const
|
||||
{
|
||||
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
||||
|
|
@ -65,7 +86,7 @@ class Cpu {
|
|||
}
|
||||
void setFamily()
|
||||
{
|
||||
unsigned int data[4];
|
||||
unsigned int data[4] = {};
|
||||
getCpuid(1, data);
|
||||
stepping = data[0] & mask(4);
|
||||
model = (data[0] >> 4) & mask(4);
|
||||
|
|
@ -88,6 +109,39 @@ class Cpu {
|
|||
{
|
||||
return (val >> base) & ((1u << (end - base)) - 1);
|
||||
}
|
||||
void setNumCores()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
|
||||
unsigned int data[4] = {};
|
||||
|
||||
/* CAUTION: These numbers are configuration as shipped by Intel. */
|
||||
getCpuidEx(0x0, 0, data);
|
||||
if (data[0] >= 0xB) {
|
||||
/*
|
||||
if leaf 11 exists(x2APIC is supported),
|
||||
we use it to get the number of smt cores and cores on socket
|
||||
|
||||
leaf 0xB can be zeroed-out by a hypervisor
|
||||
*/
|
||||
x2APIC_supported_ = true;
|
||||
for (unsigned int i = 0; i < maxTopologyLevels; i++) {
|
||||
getCpuidEx(0xB, i, data);
|
||||
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
|
||||
if (level == SmtLevel || level == CoreLevel) {
|
||||
numCores_[level - 1] = extractBit(data[1], 0, 15);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
Failed to deremine num of cores without x2APIC support.
|
||||
TODO: USE initial APIC ID to determine ncores.
|
||||
*/
|
||||
numCores_[SmtLevel - 1] = 0;
|
||||
numCores_[CoreLevel - 1] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
void setCacheHierarchy()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
|
|
@ -96,21 +150,12 @@ class Cpu {
|
|||
// const unsigned int INSTRUCTION_CACHE = 2;
|
||||
const unsigned int UNIFIED_CACHE = 3;
|
||||
unsigned int smt_width = 0;
|
||||
unsigned int n_cores = 0;
|
||||
unsigned int data[4];
|
||||
unsigned int logical_cores = 0;
|
||||
unsigned int data[4] = {};
|
||||
|
||||
/*
|
||||
if leaf 11 exists, we use it to get the number of smt cores and cores on socket
|
||||
If x2APIC is supported, these are the only correct numbers.
|
||||
|
||||
leaf 0xB can be zeroed-out by a hypervisor
|
||||
*/
|
||||
getCpuidEx(0x0, 0, data);
|
||||
if (data[0] >= 0xB) {
|
||||
getCpuidEx(0xB, 0, data); // CPUID for SMT Level
|
||||
smt_width = data[1] & 0x7FFF;
|
||||
getCpuidEx(0xB, 1, data); // CPUID for CORE Level
|
||||
n_cores = data[1] & 0x7FFF;
|
||||
if (x2APIC_supported_) {
|
||||
smt_width = numCores_[0];
|
||||
logical_cores = numCores_[1];
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -118,29 +163,29 @@ class Cpu {
|
|||
the first level of data cache is not shared (which is the
|
||||
case for every existing architecture) and use this to
|
||||
determine the SMT width for arch not supporting leaf 11.
|
||||
when leaf 4 reports a number of core less than n_cores
|
||||
when leaf 4 reports a number of core less than numCores_
|
||||
on socket reported by leaf 11, then it is a correct number
|
||||
of cores not an upperbound.
|
||||
*/
|
||||
for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
|
||||
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
|
||||
getCpuidEx(0x4, i, data);
|
||||
unsigned int cacheType = extractBit(data[0], 0, 4);
|
||||
if (cacheType == NO_CACHE) break;
|
||||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||
unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (n_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
|
||||
unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
||||
}
|
||||
assert(nb_logical_cores != 0);
|
||||
data_cache_size[data_cache_levels] =
|
||||
assert(actual_logical_cores != 0);
|
||||
dataCacheSize_[dataCacheLevels_] =
|
||||
(extractBit(data[1], 22, 31) + 1)
|
||||
* (extractBit(data[1], 12, 21) + 1)
|
||||
* (extractBit(data[1], 0, 11) + 1)
|
||||
* (data[2] + 1);
|
||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
|
||||
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
|
||||
assert(smt_width != 0);
|
||||
cores_sharing_data_cache[data_cache_levels] = (std::max)(nb_logical_cores / smt_width, 1u);
|
||||
data_cache_levels++;
|
||||
coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
|
||||
dataCacheLevels_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -154,22 +199,25 @@ public:
|
|||
int displayFamily; // family + extFamily
|
||||
int displayModel; // model + extModel
|
||||
|
||||
// may I move these members into private?
|
||||
static const unsigned int maxNumberCacheLevels = 10;
|
||||
unsigned int data_cache_size[maxNumberCacheLevels];
|
||||
unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
|
||||
unsigned int data_cache_levels;
|
||||
unsigned int getNumCores(IntelCpuTopologyLevel level) {
|
||||
if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||
switch (level) {
|
||||
case SmtLevel: return numCores_[level - 1];
|
||||
case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
|
||||
default: throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int getDataCacheLevels() const { return data_cache_levels; }
|
||||
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
|
||||
unsigned int getCoresSharingDataCache(unsigned int i) const
|
||||
{
|
||||
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
||||
return cores_sharing_data_cache[i];
|
||||
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
|
||||
return coresSharignDataCache_[i];
|
||||
}
|
||||
unsigned int getDataCacheSize(unsigned int i) const
|
||||
{
|
||||
if (i >= data_cache_levels) throw Error(ERR_BAD_PARAMETER);
|
||||
return data_cache_size[i];
|
||||
if (i >= dataCacheLevels_) throw Error(ERR_BAD_PARAMETER);
|
||||
return dataCacheSize_[i];
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -177,30 +225,45 @@ public:
|
|||
*/
|
||||
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
||||
#else
|
||||
#else
|
||||
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
#else
|
||||
(void)eaxIn;
|
||||
(void)data;
|
||||
#endif
|
||||
}
|
||||
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
|
||||
#else
|
||||
#else
|
||||
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
|
||||
#endif
|
||||
#else
|
||||
(void)eaxIn;
|
||||
(void)ecxIn;
|
||||
(void)data;
|
||||
#endif
|
||||
}
|
||||
static inline uint64 getXfeature()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
return _xgetbv(0);
|
||||
#else
|
||||
#else
|
||||
unsigned int eax, edx;
|
||||
// xgetvb is not support on gcc 4.2
|
||||
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
#endif
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
typedef uint64 Type;
|
||||
|
|
@ -271,9 +334,13 @@ public:
|
|||
|
||||
Cpu()
|
||||
: type_(NONE)
|
||||
, data_cache_levels(0)
|
||||
, x2APIC_supported_(false)
|
||||
, numCores_()
|
||||
, dataCacheSize_()
|
||||
, coresSharignDataCache_()
|
||||
, dataCacheLevels_(0)
|
||||
{
|
||||
unsigned int data[4];
|
||||
unsigned int data[4] = {};
|
||||
const unsigned int& EAX = data[0];
|
||||
const unsigned int& EBX = data[1];
|
||||
const unsigned int& ECX = data[2];
|
||||
|
|
@ -363,6 +430,7 @@ public:
|
|||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
}
|
||||
setFamily();
|
||||
setNumCores();
|
||||
setCacheHierarchy();
|
||||
}
|
||||
void putFamily() const
|
||||
|
|
@ -381,12 +449,17 @@ class Clock {
|
|||
public:
|
||||
static inline uint64 getRdtsc()
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
#else
|
||||
unsigned int eax, edx;
|
||||
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||
return ((uint64)edx << 32) | eax;
|
||||
#endif
|
||||
#else
|
||||
// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
Clock()
|
||||
|
|
@ -416,7 +489,7 @@ const int UseRCX = 1 << 6;
|
|||
const int UseRDX = 1 << 7;
|
||||
|
||||
class Pack {
|
||||
static const size_t maxTblNum = 10;
|
||||
static const size_t maxTblNum = 15;
|
||||
const Xbyak::Reg64 *tbl_[maxTblNum];
|
||||
size_t n_;
|
||||
public:
|
||||
|
|
@ -476,7 +549,7 @@ public:
|
|||
const Xbyak::Reg64& operator[](size_t n) const
|
||||
{
|
||||
if (n >= n_) {
|
||||
fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
|
||||
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
|
||||
throw Error(ERR_BAD_PARAMETER);
|
||||
}
|
||||
return *tbl_[n];
|
||||
|
|
@ -518,6 +591,7 @@ class StackFrame {
|
|||
static const int rcxPos = 3;
|
||||
static const int rdxPos = 2;
|
||||
#endif
|
||||
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
|
||||
Xbyak::CodeGenerator *code_;
|
||||
int pNum_;
|
||||
int tNum_;
|
||||
|
|
@ -527,7 +601,7 @@ class StackFrame {
|
|||
int P_;
|
||||
bool makeEpilog_;
|
||||
Xbyak::Reg64 pTbl_[4];
|
||||
Xbyak::Reg64 tTbl_[10];
|
||||
Xbyak::Reg64 tTbl_[maxRegNum];
|
||||
Pack p_;
|
||||
Pack t_;
|
||||
StackFrame(const StackFrame&);
|
||||
|
|
@ -539,7 +613,7 @@ public:
|
|||
make stack frame
|
||||
@param sf [in] this
|
||||
@param pNum [in] num of function parameter(0 <= pNum <= 4)
|
||||
@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
|
||||
@param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14
|
||||
@param stackSizeByte [in] local stack size
|
||||
@param makeEpilog [in] automatically call close() if true
|
||||
|
||||
|
|
@ -566,27 +640,17 @@ public:
|
|||
using namespace Xbyak;
|
||||
if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
|
||||
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
|
||||
if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
|
||||
if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
|
||||
const Reg64& _rsp = code->rsp;
|
||||
const AddressFrame& _ptr = code->ptr;
|
||||
saveNum_ = (std::max)(0, allRegNum - noSaveNum);
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
P_ = saveNum_ + (stackSizeByte + 7) / 8;
|
||||
if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->push(Reg64(tbl[i]));
|
||||
}
|
||||
P_ = (stackSizeByte + 7) / 8;
|
||||
if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment
|
||||
P_ *= 8;
|
||||
if (P_ > 0) code->sub(_rsp, P_);
|
||||
#ifdef XBYAK64_WIN
|
||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
||||
code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
|
||||
}
|
||||
for (int i = 4; i < saveNum_; i++) {
|
||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
|
||||
}
|
||||
#endif
|
||||
int pos = 0;
|
||||
for (int i = 0; i < pNum; i++) {
|
||||
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
|
||||
|
|
@ -607,21 +671,11 @@ public:
|
|||
{
|
||||
using namespace Xbyak;
|
||||
const Reg64& _rsp = code_->rsp;
|
||||
const AddressFrame& _ptr = code_->ptr;
|
||||
const int *tbl = getOrderTbl() + noSaveNum;
|
||||
#ifdef XBYAK64_WIN
|
||||
for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
|
||||
}
|
||||
for (int i = 4; i < saveNum_; i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
|
||||
}
|
||||
#endif
|
||||
if (P_ > 0) code_->add(_rsp, P_);
|
||||
for (int i = 0; i < saveNum_; i++) {
|
||||
code_->pop(Reg64(tbl[saveNum_ - 1 - i]));
|
||||
}
|
||||
|
||||
if (callRet) code_->ret();
|
||||
}
|
||||
|
|
@ -633,9 +687,6 @@ public:
|
|||
} catch (std::exception& e) {
|
||||
printf("ERR:StackFrame %s\n", e.what());
|
||||
exit(1);
|
||||
} catch (...) {
|
||||
printf("ERR:StackFrame otherwise\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
private:
|
||||
|
|
@ -654,7 +705,7 @@ private:
|
|||
}
|
||||
int getRegIdx(int& pos) const
|
||||
{
|
||||
assert(pos < 14);
|
||||
assert(pos < maxRegNum);
|
||||
using namespace Xbyak;
|
||||
const int *tbl = getOrderTbl();
|
||||
int r = tbl[pos++];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue