mirror of
https://git.suyu.dev/suyu/dynarmic.git
synced 2026-01-01 12:14:50 +01:00
Squashed 'externals/xbyak/' changes from d512551e..2794cde7
2794cde7 add xword, yword, etc. in Xbyak::util
fb9c04e4 fix document for vfpclassps
a51be78b fix test dependency
04fdfb1e update version
e6354f8b add vgf2p8mulb
09a12642 add gf2p8affineqb
d171ba0e add gf2p8affineinvqb
457f4fd0 add vpshufbitqmb
5af0ba39 add vpexpand{b,w}
e450f965 vpopcnt{d,q} supports ptr_b
48499eb1 add vpdpbusd(s), vpdpwssd(s)
9c745109 add vpdpbusd, vpdpbusds
0e1a11b4 add vpopcnt{b,w,d,q}
9acfc132 add vpshrd(v){w,d,q}
ac8de850 add vpshld(v){w,d,q}
f181c259 add vcompressb, vcompressw
5a402477 vpclmulqdq supports AVX-512
9e16b40b vaes* supports AVX-512
7fde08e0 add flags for intel's manual 319433-030.pdf
c5da3778 add test of v4fmaddps, vp4dpwssd, etc.
e4fc9d8a fix mpx encoding
d0b2fb62 add bnd(0xf2) prefix for MPX
f12b5678 use db for array
cd74ab44 remove bat file
git-subtree-dir: externals/xbyak
git-subtree-split: 2794cde79eb71e86490061cac9622ad0067b8d15
This commit is contained in:
parent
4ed09fda06
commit
9fb82036ca
16 changed files with 935 additions and 173 deletions
|
|
@ -1,8 +0,0 @@
|
|||
@echo off
|
||||
rm a.lst b.lst
|
||||
echo nasm
|
||||
nasm -l a.lst -f win64 test.asm
|
||||
cat a.lst
|
||||
echo yasm
|
||||
yasm -l b.lst -f win64 test.asm
|
||||
cat b.lst
|
||||
|
|
@ -1,11 +1,11 @@
|
|||
TARGET = make_nm normalize_prefix jmp address nm_frame bad_address misc
|
||||
TARGET = make_nm normalize_prefix jmp address bad_address misc
|
||||
XBYAK_INC=../xbyak/xbyak.h
|
||||
BIT=32
|
||||
ifeq ($(shell uname -m),x86_64)
|
||||
BIT=64
|
||||
endif
|
||||
|
||||
ifeq ($(MODE_BIT),64)
|
||||
ifeq ($(BIT),64)
|
||||
TARGET += jmp64 address64
|
||||
endif
|
||||
|
||||
|
|
@ -28,14 +28,12 @@ address: address.cpp ../xbyak/xbyak.h
|
|||
$(CXX) $(CFLAGS) address.cpp -o $@ -m32
|
||||
address64: address.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) address.cpp -o $@ -m64
|
||||
nm_frame: nm_frame.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) nm_frame.cpp -o $@ -m32
|
||||
bad_address: bad_address.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) bad_address.cpp -o $@
|
||||
misc: misc.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) misc.cpp -o $@
|
||||
|
||||
test: normalize_prefix jmp bad_address
|
||||
test: normalize_prefix jmp bad_address $(TARGET)
|
||||
$(MAKE) -C ../gen
|
||||
./test_nm.sh
|
||||
./test_nm.sh Y
|
||||
|
|
@ -65,7 +63,7 @@ ifeq ($(BIT),64)
|
|||
./test_avx512.sh 64
|
||||
endif
|
||||
clean:
|
||||
rm -rf *.o $(TARGET) lib_run
|
||||
rm -rf *.o $(TARGET) lib_run nm.cpp nm_frame make_512
|
||||
|
||||
lib_run: lib_test.cpp lib_run.cpp lib.h
|
||||
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
||||
|
|
|
|||
|
|
@ -86,13 +86,15 @@ public:
|
|||
}
|
||||
fflush(stdout);
|
||||
if (msg.empty()) {
|
||||
int err = ngCount_ + exceptionCount_;
|
||||
int total = okCount_ + err;
|
||||
std::cout << "ctest:name=" << getBaseName(*argv)
|
||||
<< ", module=" << list_.size()
|
||||
<< ", total=" << (okCount_ + ngCount_ + exceptionCount_)
|
||||
<< ", total=" << total
|
||||
<< ", ok=" << okCount_
|
||||
<< ", ng=" << ngCount_
|
||||
<< ", exception=" << exceptionCount_ << std::endl;
|
||||
return 0;
|
||||
return err > 0 ? 1 : 0;
|
||||
} else {
|
||||
std::cout << msg << std::endl;
|
||||
return 1;
|
||||
|
|
@ -128,6 +130,15 @@ bool isEqual(const T& lhs, const U& rhs)
|
|||
return lhs == rhs;
|
||||
}
|
||||
|
||||
// avoid warning of comparision of integers of different signs
|
||||
inline bool isEqual(size_t lhs, int rhs)
|
||||
{
|
||||
return lhs == size_t(rhs);
|
||||
}
|
||||
inline bool isEqual(int lhs, size_t rhs)
|
||||
{
|
||||
return size_t(lhs) == rhs;
|
||||
}
|
||||
inline bool isEqual(const char *lhs, const char *rhs)
|
||||
{
|
||||
return strcmp(lhs, rhs) == 0;
|
||||
|
|
@ -188,9 +199,9 @@ int main(int argc, char *argv[])
|
|||
@param y [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_EQUAL(x, y) { \
|
||||
bool eq = cybozu::test::isEqual(x, y); \
|
||||
cybozu::test::test(eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!eq) { \
|
||||
bool _cybozu_eq = cybozu::test::isEqual(x, y); \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: lhs=" << (x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y) << std::endl; \
|
||||
} \
|
||||
|
|
@ -201,22 +212,39 @@ int main(int argc, char *argv[])
|
|||
@param y [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_NEAR(x, y, eps) { \
|
||||
bool isNear = fabs((x) - (y)) < eps; \
|
||||
cybozu::test::test(isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!isNear) { \
|
||||
bool _cybozu_isNear = fabs((x) - (y)) < eps; \
|
||||
cybozu::test::test(_cybozu_isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_isNear) { \
|
||||
std::cout << "ctest: lhs=" << (x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CYBOZU_TEST_EQUAL_POINTER(x, y) { \
|
||||
bool eq = x == y; \
|
||||
cybozu::test::test(eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!eq) { \
|
||||
bool _cybozu_eq = x == y; \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: lhs=" << static_cast<const void*>(x) << std::endl; \
|
||||
std::cout << "ctest: rhs=" << static_cast<const void*>(y) << std::endl; \
|
||||
} \
|
||||
}
|
||||
/**
|
||||
alert if x[] != y[]
|
||||
@param x [in]
|
||||
@param y [in]
|
||||
@param n [in]
|
||||
*/
|
||||
#define CYBOZU_TEST_EQUAL_ARRAY(x, y, n) { \
|
||||
for (size_t _cybozu_test_i = 0, _cybozu_ie = (size_t)(n); _cybozu_test_i < _cybozu_ie; _cybozu_test_i++) { \
|
||||
bool _cybozu_eq = cybozu::test::isEqual((x)[_cybozu_test_i], (y)[_cybozu_test_i]); \
|
||||
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_ARRAY", #x ", " #y ", " #n, __FILE__, __LINE__); \
|
||||
if (!_cybozu_eq) { \
|
||||
std::cout << "ctest: i=" << _cybozu_test_i << std::endl; \
|
||||
std::cout << "ctest: lhs=" << (x)[_cybozu_test_i] << std::endl; \
|
||||
std::cout << "ctest: rhs=" << (y)[_cybozu_test_i] << std::endl; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
always alert
|
||||
|
|
@ -229,25 +257,25 @@ int main(int argc, char *argv[])
|
|||
*/
|
||||
#define CYBOZU_TEST_EXCEPTION_MESSAGE(statement, Exception, msg) \
|
||||
{ \
|
||||
int ret = 0; \
|
||||
std::string errMsg; \
|
||||
int _cybozu_ret = 0; \
|
||||
std::string _cybozu_errMsg; \
|
||||
try { \
|
||||
statement; \
|
||||
ret = 1; \
|
||||
} catch (const Exception& e) { \
|
||||
errMsg = e.what(); \
|
||||
if (errMsg.find(msg) == std::string::npos) { \
|
||||
ret = 2; \
|
||||
_cybozu_ret = 1; \
|
||||
} catch (const Exception& _cybozu_e) { \
|
||||
_cybozu_errMsg = _cybozu_e.what(); \
|
||||
if (_cybozu_errMsg.find(msg) == std::string::npos) { \
|
||||
_cybozu_ret = 2; \
|
||||
} \
|
||||
} catch (...) { \
|
||||
ret = 3; \
|
||||
_cybozu_ret = 3; \
|
||||
} \
|
||||
if (ret) { \
|
||||
if (_cybozu_ret) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION_MESSAGE", #statement ", " #Exception ", " #msg, __FILE__, __LINE__); \
|
||||
if (ret == 1) { \
|
||||
if (_cybozu_ret == 1) { \
|
||||
std::cout << "ctest: no exception" << std::endl; \
|
||||
} else if (ret == 2) { \
|
||||
std::cout << "ctest: bad exception msg:" << errMsg << std::endl; \
|
||||
} else if (_cybozu_ret == 2) { \
|
||||
std::cout << "ctest: bad exception msg:" << _cybozu_errMsg << std::endl; \
|
||||
} else { \
|
||||
std::cout << "ctest: unexpected exception" << std::endl; \
|
||||
} \
|
||||
|
|
@ -258,17 +286,17 @@ int main(int argc, char *argv[])
|
|||
|
||||
#define CYBOZU_TEST_EXCEPTION(statement, Exception) \
|
||||
{ \
|
||||
int ret = 0; \
|
||||
int _cybozu_ret = 0; \
|
||||
try { \
|
||||
statement; \
|
||||
ret = 1; \
|
||||
_cybozu_ret = 1; \
|
||||
} catch (const Exception&) { \
|
||||
} catch (...) { \
|
||||
ret = 2; \
|
||||
_cybozu_ret = 2; \
|
||||
} \
|
||||
if (ret) { \
|
||||
if (_cybozu_ret) { \
|
||||
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION", #statement ", " #Exception, __FILE__, __LINE__); \
|
||||
if (ret == 1) { \
|
||||
if (_cybozu_ret == 1) { \
|
||||
std::cout << "ctest: no exception" << std::endl; \
|
||||
} else { \
|
||||
std::cout << "ctest: unexpected exception" << std::endl; \
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#define XBYAK_NO_OP_NAMES
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <cybozu/inttype.hpp>
|
||||
#include <cybozu/test.hpp>
|
||||
|
|
@ -401,7 +402,7 @@ CYBOZU_TEST_AUTO(test5)
|
|||
using namespace Xbyak;
|
||||
inLocalLabel();
|
||||
mov(ecx, count);
|
||||
xor(eax, eax);
|
||||
xor_(eax, eax);
|
||||
L(".lp");
|
||||
for (int i = 0; i < count; i++) {
|
||||
L(Label::toStr(i));
|
||||
|
|
|
|||
|
|
@ -1363,6 +1363,22 @@ class Test {
|
|||
put("bndmk", BNDREG, MEM);
|
||||
put("bndmov", BNDREG, BNDREG|MEM);
|
||||
put("bndstx", MEM, BNDREG);
|
||||
put("bndstx", "ptr [eax]", "[eax]", BNDREG);
|
||||
put("bndstx", "ptr [eax+5]", "[eax+5]", BNDREG);
|
||||
put("bndstx", "ptr [eax+500]", "[eax+500]", BNDREG);
|
||||
put("bndstx", "ptr [eax+ecx]", "[eax+ecx]", BNDREG);
|
||||
put("bndstx", "ptr [ecx+eax]", "[ecx+eax]", BNDREG);
|
||||
put("bndstx", "ptr [eax+esp]", "[eax+esp]", BNDREG);
|
||||
put("bndstx", "ptr [esp+eax]", "[esp+eax]", BNDREG);
|
||||
put("bndstx", "ptr [eax+ecx*2]", "[eax+ecx*2]", BNDREG);
|
||||
put("bndstx", "ptr [ecx+ecx]", "[ecx+ecx]", BNDREG);
|
||||
put("bndstx", "ptr [ecx*2]", "[ecx*2]", BNDREG);
|
||||
put("bndstx", "ptr [eax+ecx*2+500]", "[eax+ecx*2+500]", BNDREG);
|
||||
#ifdef XBYAK64
|
||||
put("bndstx", "ptr [rax+rcx*2]", "[rax+rcx*2]", BNDREG);
|
||||
put("bndstx", "ptr [r9*2]", "[r9*2]", BNDREG);
|
||||
put("bndstx", "ptr [r9*2+r15]", "[r9*2+r15]", BNDREG);
|
||||
#endif
|
||||
}
|
||||
void putFpuMem16_32() const
|
||||
{
|
||||
|
|
|
|||
580
test/misc.cpp
580
test/misc.cpp
|
|
@ -103,3 +103,583 @@ CYBOZU_TEST_AUTO(align)
|
|||
}
|
||||
} c;
|
||||
}
|
||||
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_AUTO(vfmaddps)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
v4fmaddps(zmm1, zmm8, ptr [rdx + 64]);
|
||||
v4fmaddss(xmm15, xmm8, ptr [rax + 64]);
|
||||
v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);
|
||||
v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]);
|
||||
vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]);
|
||||
vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x3f, 0x48, 0x9a, 0x4a, 0x04,
|
||||
0x62, 0x72, 0x3f, 0x08, 0x9b, 0x78, 0x04,
|
||||
0x62, 0xf2, 0x6f, 0x4d, 0xaa, 0x69, 0x08,
|
||||
0x62, 0x62, 0x6f, 0x08, 0xab, 0x7c, 0x24, 0x08,
|
||||
0x62, 0xe2, 0x77, 0xcf, 0x52, 0x78, 0x04,
|
||||
0x62, 0x72, 0x67, 0x4c, 0x53, 0x54, 0x84, 0x04,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vaes)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vaesdec(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesdec(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesdec(zmm1, zmm2, ptr [rcx + 64]);
|
||||
|
||||
vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
|
||||
vaesenc(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesenc(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesenc(zmm1, zmm2, ptr [rcx + 64]);
|
||||
|
||||
vaesenclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesenclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesenclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDE, 0x61, 0x04,
|
||||
0xC4, 0xE2, 0x6D, 0xDE, 0x49, 0x40,
|
||||
0x62, 0xF2, 0x6D, 0x48, 0xDE, 0x49, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDF, 0x60, 0x04,
|
||||
0x62, 0xE2, 0x0D, 0x20, 0xDF, 0x60, 0x02,
|
||||
0x62, 0xE2, 0x0D, 0x40, 0xDF, 0x60, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDC, 0x61, 0x04,
|
||||
0xC4, 0xE2, 0x6D, 0xDC, 0x49, 0x40,
|
||||
0x62, 0xF2, 0x6D, 0x48, 0xDC, 0x49, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDD, 0x60, 0x04,
|
||||
0x62, 0xE2, 0x0D, 0x20, 0xDD, 0x60, 0x02,
|
||||
0x62, 0xE2, 0x0D, 0x40, 0xDD, 0x60, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpclmulqdq)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
|
||||
|
||||
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0xc4, 0xe3, 0x61, 0x44, 0x50, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0x65, 0x44, 0x50, 0x40, 0x03,
|
||||
0x62, 0xf3, 0x65, 0x48, 0x44, 0x50, 0x01, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x08, 0x44, 0x60, 0x04, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x28, 0x44, 0x60, 0x02, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x48, 0x44, 0x60, 0x01, 0x03,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vcompressb_w)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vcompressb(ptr[rax + 64], xmm1);
|
||||
vcompressb(xmm30 | k5, xmm1);
|
||||
vcompressb(ptr[rax + 64], ymm1);
|
||||
vcompressb(ymm30 | k3 |T_z, ymm1);
|
||||
vcompressb(ptr[rax + 64], zmm1);
|
||||
vcompressb(zmm30 | k2 |T_z, zmm1);
|
||||
|
||||
vcompressw(ptr[rax + 64], xmm1);
|
||||
vcompressw(xmm30 | k5, xmm1);
|
||||
vcompressw(ptr[rax + 64], ymm1);
|
||||
vcompressw(ymm30 | k3 |T_z, ymm1);
|
||||
vcompressw(ptr[rax + 64], zmm1);
|
||||
vcompressw(zmm30 | k2 |T_z, zmm1);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x7d, 0x08, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0x0d, 0x63, 0xce,
|
||||
0x62, 0xf2, 0x7d, 0x28, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0xab, 0x63, 0xce,
|
||||
0x62, 0xf2, 0x7d, 0x48, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0xca, 0x63, 0xce,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x08, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0x0d, 0x63, 0xce,
|
||||
0x62, 0xf2, 0xfd, 0x28, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0xab, 0x63, 0xce,
|
||||
0x62, 0xf2, 0xfd, 0x48, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0xca, 0x63, 0xce,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(shld)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x70, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x70, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x70, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x8b, 0x71, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xab, 0x71, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xcb, 0x71, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x71, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x71, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x71, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x70, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x70, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x70, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x8b, 0x71, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0xab, 0x71, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0xcb, 0x71, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x71, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x71, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x71, 0x68, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(shrd)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x72, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x72, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x72, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x8b, 0x73, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xab, 0x73, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xcb, 0x73, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x73, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x73, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x73, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x72, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x72, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x72, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x8b, 0x73, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0xab, 0x73, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0xcb, 0x73, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x73, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x73, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x73, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x9b, 0x73, 0x68, 0x10, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xbb, 0x73, 0x68, 0x10, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xdb, 0x73, 0x68, 0x10, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x9b, 0x73, 0x68, 0x08, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xbb, 0x73, 0x68, 0x08, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xdb, 0x73, 0x68, 0x08, 0x05,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x9b, 0x73, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x6d, 0xbb, 0x73, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x6d, 0xdb, 0x73, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x9b, 0x73, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xed, 0xbb, 0x73, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xed, 0xdb, 0x73, 0x68, 0x08,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpopcnt)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
|
||||
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x54, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x54, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x54, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x54, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x54, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x54, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x55, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x55, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x55, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x7d, 0x9b, 0x55, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x7d, 0xbb, 0x55, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x7d, 0xdb, 0x55, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x55, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x55, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x55, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x9b, 0x55, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xfd, 0xbb, 0x55, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xfd, 0xdb, 0x55, 0x68, 0x08,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpdpbus)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x50, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x50, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x50, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x50, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x50, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x50, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x51, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x51, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x51, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x51, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x51, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x51, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x52, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x52, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x52, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x52, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x52, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x52, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x53, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x53, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x53, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x53, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x53, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x53, 0x68, 0x10,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vexpand_vpshufbitqmb)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpexpandb(xmm5|k3|T_z, xmm30);
|
||||
vpexpandb(ymm5|k3|T_z, ymm30);
|
||||
vpexpandb(zmm5|k3|T_z, zmm30);
|
||||
vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpexpandw(xmm5|k3|T_z, xmm30);
|
||||
vpexpandw(ymm5|k3|T_z, ymm30);
|
||||
vpexpandw(zmm5|k3|T_z, zmm30);
|
||||
vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0x92, 0x7d, 0x8b, 0x62, 0xee,
|
||||
0x62, 0x92, 0x7d, 0xab, 0x62, 0xee,
|
||||
0x62, 0x92, 0x7d, 0xcb, 0x62, 0xee,
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x62, 0x68, 0x40,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x62, 0x68, 0x40,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x62, 0x68, 0x40,
|
||||
|
||||
0x62, 0x92, 0xfd, 0x8b, 0x62, 0xee,
|
||||
0x62, 0x92, 0xfd, 0xab, 0x62, 0xee,
|
||||
0x62, 0x92, 0xfd, 0xcb, 0x62, 0xee,
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x62, 0x68, 0x20,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x62, 0x68, 0x20,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x62, 0x68, 0x20,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x0a, 0x8f, 0x48, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0x2a, 0x8f, 0x48, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0x4a, 0x8f, 0x48, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(gf2)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
///
|
||||
gf2p8affineinvqb(xmm1, xmm2, 3);
|
||||
gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
|
||||
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
///
|
||||
gf2p8affineqb(xmm1, xmm2, 3);
|
||||
gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
|
||||
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
///
|
||||
gf2p8mulb(xmm1, xmm2);
|
||||
gf2p8mulb(xmm1, ptr [rax + 0x40]);
|
||||
|
||||
vgf2p8mulb(xmm1, xmm5, xmm2);
|
||||
vgf2p8mulb(ymm1, ymm5, ymm2);
|
||||
vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
|
||||
|
||||
vgf2p8mulb(xmm30, xmm31, xmm4);
|
||||
vgf2p8mulb(ymm30, ymm31, ymm4);
|
||||
vgf2p8mulb(zmm30, zmm31, zmm4);
|
||||
|
||||
vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x66, 0x0f, 0x3a, 0xcf, 0xca, 0x03,
|
||||
0x66, 0x0f, 0x3a, 0xcf, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xcf, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xcf, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xcf, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xcf, 0x48, 0x40, 0x03,
|
||||
0x62, 0x63, 0x85, 0x00, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x20, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x40, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x89, 0xcf, 0x70, 0x04, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xa9, 0xcf, 0x70, 0x02, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xc9, 0xcf, 0x70, 0x01, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x99, 0xcf, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xb9, 0xcf, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xd9, 0xcf, 0x70, 0x08, 0x05,
|
||||
|
||||
0x66, 0x0f, 0x3a, 0xce, 0xca, 0x03,
|
||||
0x66, 0x0f, 0x3a, 0xce, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xce, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xce, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xce, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xce, 0x48, 0x40, 0x03,
|
||||
0x62, 0x63, 0x85, 0x00, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x20, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x40, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x89, 0xce, 0x70, 0x04, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xa9, 0xce, 0x70, 0x02, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xc9, 0xce, 0x70, 0x01, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x99, 0xce, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xb9, 0xce, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xd9, 0xce, 0x70, 0x08, 0x05,
|
||||
|
||||
0x66, 0x0f, 0x38, 0xcf, 0xca,
|
||||
0x66, 0x0f, 0x38, 0xcf, 0x48, 0x40,
|
||||
0xc4, 0xe2, 0x51, 0xcf, 0xca,
|
||||
0xc4, 0xe2, 0x55, 0xcf, 0xca,
|
||||
0xc4, 0xe2, 0x51, 0xcf, 0x48, 0x40,
|
||||
0xc4, 0xe2, 0x55, 0xcf, 0x48, 0x40,
|
||||
0x62, 0x62, 0x05, 0x00, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x05, 0x20, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x05, 0x40, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x55, 0x89, 0xcf, 0x70, 0x04,
|
||||
0x62, 0x62, 0x55, 0xa9, 0xcf, 0x70, 0x02,
|
||||
0x62, 0x62, 0x55, 0xc9, 0xcf, 0x70, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ using namespace Xbyak;
|
|||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4245)
|
||||
#pragma warning(disable : 4312)
|
||||
#endif
|
||||
class Sample : public CodeGenerator {
|
||||
void operator=(const Sample&);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
@echo off
|
||||
set FILTER=cat
|
||||
set FILTER=grep -v warning
|
||||
if /i "%1"=="64" (
|
||||
set OPT2=-DXBYAK64
|
||||
set OPT3=win64
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue