Squashed 'externals/xbyak/' changes from d512551e..2794cde7

2794cde7 add xword, yword, etc. in Xbyak::util
fb9c04e4 fix document for vfpclassps
a51be78b fix test dependency
04fdfb1e update version
e6354f8b add vgf2p8mulb
09a12642 add gf2p8affineqb
d171ba0e add gf2p8affineinvqb
457f4fd0 add vpshufbitqmb
5af0ba39 add vpexpand{b,w}
e450f965 vpopcnt{d,q} supports ptr_b
48499eb1 add vpdpbusd(s), vpdpwssd(s)
9c745109 add vpdpbusd, vpdpbusds
0e1a11b4 add vpopcnt{b,w,d,q}
9acfc132 add vpshrd(v){w,d,q}
ac8de850 add vpshld(v){w,d,q}
f181c259 add vcompressb, vcompressw
5a402477 vpclmulqdq supports AVX-512
9e16b40b vaes* supports AVX-512
7fde08e0 add flags for intel's manual 319433-030.pdf
c5da3778 add test of v4fmaddps, vp4dpwssd, etc.
e4fc9d8a fix mpx encoding
d0b2fb62 add bnd(0xf2) prefix for MPX
f12b5678 use db for array
cd74ab44 remove bat file

git-subtree-dir: externals/xbyak
git-subtree-split: 2794cde79eb71e86490061cac9622ad0067b8d15
This commit is contained in:
MerryMage 2020-04-22 20:45:52 +01:00
parent 4ed09fda06
commit 9fb82036ca
16 changed files with 935 additions and 173 deletions

View file

@ -1,8 +0,0 @@
@echo off
rm a.lst b.lst
echo nasm
nasm -l a.lst -f win64 test.asm
cat a.lst
echo yasm
yasm -l b.lst -f win64 test.asm
cat b.lst

View file

@ -1,11 +1,11 @@
TARGET = make_nm normalize_prefix jmp address nm_frame bad_address misc
TARGET = make_nm normalize_prefix jmp address bad_address misc
XBYAK_INC=../xbyak/xbyak.h
BIT=32
ifeq ($(shell uname -m),x86_64)
BIT=64
endif
ifeq ($(MODE_BIT),64)
ifeq ($(BIT),64)
TARGET += jmp64 address64
endif
@ -28,14 +28,12 @@ address: address.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) address.cpp -o $@ -m32
address64: address.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) address.cpp -o $@ -m64
nm_frame: nm_frame.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) nm_frame.cpp -o $@ -m32
bad_address: bad_address.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) bad_address.cpp -o $@
misc: misc.cpp ../xbyak/xbyak.h
$(CXX) $(CFLAGS) misc.cpp -o $@
test: normalize_prefix jmp bad_address
test: normalize_prefix jmp bad_address $(TARGET)
$(MAKE) -C ../gen
./test_nm.sh
./test_nm.sh Y
@ -65,7 +63,7 @@ ifeq ($(BIT),64)
./test_avx512.sh 64
endif
clean:
rm -rf *.o $(TARGET) lib_run
rm -rf *.o $(TARGET) lib_run nm.cpp nm_frame make_512
lib_run: lib_test.cpp lib_run.cpp lib.h
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run

View file

@ -86,13 +86,15 @@ public:
}
fflush(stdout);
if (msg.empty()) {
int err = ngCount_ + exceptionCount_;
int total = okCount_ + err;
std::cout << "ctest:name=" << getBaseName(*argv)
<< ", module=" << list_.size()
<< ", total=" << (okCount_ + ngCount_ + exceptionCount_)
<< ", total=" << total
<< ", ok=" << okCount_
<< ", ng=" << ngCount_
<< ", exception=" << exceptionCount_ << std::endl;
return 0;
return err > 0 ? 1 : 0;
} else {
std::cout << msg << std::endl;
return 1;
@ -128,6 +130,15 @@ bool isEqual(const T& lhs, const U& rhs)
return lhs == rhs;
}
// avoid warning of comparision of integers of different signs
inline bool isEqual(size_t lhs, int rhs)
{
return lhs == size_t(rhs);
}
inline bool isEqual(int lhs, size_t rhs)
{
return size_t(lhs) == rhs;
}
inline bool isEqual(const char *lhs, const char *rhs)
{
return strcmp(lhs, rhs) == 0;
@ -188,9 +199,9 @@ int main(int argc, char *argv[])
@param y [in]
*/
#define CYBOZU_TEST_EQUAL(x, y) { \
bool eq = cybozu::test::isEqual(x, y); \
cybozu::test::test(eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
if (!eq) { \
bool _cybozu_eq = cybozu::test::isEqual(x, y); \
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL", #x ", " #y, __FILE__, __LINE__); \
if (!_cybozu_eq) { \
std::cout << "ctest: lhs=" << (x) << std::endl; \
std::cout << "ctest: rhs=" << (y) << std::endl; \
} \
@ -201,22 +212,39 @@ int main(int argc, char *argv[])
@param y [in]
*/
#define CYBOZU_TEST_NEAR(x, y, eps) { \
bool isNear = fabs((x) - (y)) < eps; \
cybozu::test::test(isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
if (!isNear) { \
bool _cybozu_isNear = fabs((x) - (y)) < eps; \
cybozu::test::test(_cybozu_isNear, "CYBOZU_TEST_NEAR", #x ", " #y, __FILE__, __LINE__); \
if (!_cybozu_isNear) { \
std::cout << "ctest: lhs=" << (x) << std::endl; \
std::cout << "ctest: rhs=" << (y) << std::endl; \
} \
}
#define CYBOZU_TEST_EQUAL_POINTER(x, y) { \
bool eq = x == y; \
cybozu::test::test(eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
if (!eq) { \
bool _cybozu_eq = x == y; \
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_POINTER", #x ", " #y, __FILE__, __LINE__); \
if (!_cybozu_eq) { \
std::cout << "ctest: lhs=" << static_cast<const void*>(x) << std::endl; \
std::cout << "ctest: rhs=" << static_cast<const void*>(y) << std::endl; \
} \
}
/**
alert if x[] != y[]
@param x [in]
@param y [in]
@param n [in]
*/
#define CYBOZU_TEST_EQUAL_ARRAY(x, y, n) { \
for (size_t _cybozu_test_i = 0, _cybozu_ie = (size_t)(n); _cybozu_test_i < _cybozu_ie; _cybozu_test_i++) { \
bool _cybozu_eq = cybozu::test::isEqual((x)[_cybozu_test_i], (y)[_cybozu_test_i]); \
cybozu::test::test(_cybozu_eq, "CYBOZU_TEST_EQUAL_ARRAY", #x ", " #y ", " #n, __FILE__, __LINE__); \
if (!_cybozu_eq) { \
std::cout << "ctest: i=" << _cybozu_test_i << std::endl; \
std::cout << "ctest: lhs=" << (x)[_cybozu_test_i] << std::endl; \
std::cout << "ctest: rhs=" << (y)[_cybozu_test_i] << std::endl; \
} \
} \
}
/**
always alert
@ -229,25 +257,25 @@ int main(int argc, char *argv[])
*/
#define CYBOZU_TEST_EXCEPTION_MESSAGE(statement, Exception, msg) \
{ \
int ret = 0; \
std::string errMsg; \
int _cybozu_ret = 0; \
std::string _cybozu_errMsg; \
try { \
statement; \
ret = 1; \
} catch (const Exception& e) { \
errMsg = e.what(); \
if (errMsg.find(msg) == std::string::npos) { \
ret = 2; \
_cybozu_ret = 1; \
} catch (const Exception& _cybozu_e) { \
_cybozu_errMsg = _cybozu_e.what(); \
if (_cybozu_errMsg.find(msg) == std::string::npos) { \
_cybozu_ret = 2; \
} \
} catch (...) { \
ret = 3; \
_cybozu_ret = 3; \
} \
if (ret) { \
if (_cybozu_ret) { \
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION_MESSAGE", #statement ", " #Exception ", " #msg, __FILE__, __LINE__); \
if (ret == 1) { \
if (_cybozu_ret == 1) { \
std::cout << "ctest: no exception" << std::endl; \
} else if (ret == 2) { \
std::cout << "ctest: bad exception msg:" << errMsg << std::endl; \
} else if (_cybozu_ret == 2) { \
std::cout << "ctest: bad exception msg:" << _cybozu_errMsg << std::endl; \
} else { \
std::cout << "ctest: unexpected exception" << std::endl; \
} \
@ -258,17 +286,17 @@ int main(int argc, char *argv[])
#define CYBOZU_TEST_EXCEPTION(statement, Exception) \
{ \
int ret = 0; \
int _cybozu_ret = 0; \
try { \
statement; \
ret = 1; \
_cybozu_ret = 1; \
} catch (const Exception&) { \
} catch (...) { \
ret = 2; \
_cybozu_ret = 2; \
} \
if (ret) { \
if (_cybozu_ret) { \
cybozu::test::test(false, "CYBOZU_TEST_EXCEPTION", #statement ", " #Exception, __FILE__, __LINE__); \
if (ret == 1) { \
if (_cybozu_ret == 1) { \
std::cout << "ctest: no exception" << std::endl; \
} else { \
std::cout << "ctest: unexpected exception" << std::endl; \

View file

@ -1,6 +1,7 @@
#include <stdio.h>
#include <string.h>
#include <string>
#define XBYAK_NO_OP_NAMES
#include <xbyak/xbyak.h>
#include <cybozu/inttype.hpp>
#include <cybozu/test.hpp>
@ -401,7 +402,7 @@ CYBOZU_TEST_AUTO(test5)
using namespace Xbyak;
inLocalLabel();
mov(ecx, count);
xor(eax, eax);
xor_(eax, eax);
L(".lp");
for (int i = 0; i < count; i++) {
L(Label::toStr(i));

View file

@ -1363,6 +1363,22 @@ class Test {
put("bndmk", BNDREG, MEM);
put("bndmov", BNDREG, BNDREG|MEM);
put("bndstx", MEM, BNDREG);
put("bndstx", "ptr [eax]", "[eax]", BNDREG);
put("bndstx", "ptr [eax+5]", "[eax+5]", BNDREG);
put("bndstx", "ptr [eax+500]", "[eax+500]", BNDREG);
put("bndstx", "ptr [eax+ecx]", "[eax+ecx]", BNDREG);
put("bndstx", "ptr [ecx+eax]", "[ecx+eax]", BNDREG);
put("bndstx", "ptr [eax+esp]", "[eax+esp]", BNDREG);
put("bndstx", "ptr [esp+eax]", "[esp+eax]", BNDREG);
put("bndstx", "ptr [eax+ecx*2]", "[eax+ecx*2]", BNDREG);
put("bndstx", "ptr [ecx+ecx]", "[ecx+ecx]", BNDREG);
put("bndstx", "ptr [ecx*2]", "[ecx*2]", BNDREG);
put("bndstx", "ptr [eax+ecx*2+500]", "[eax+ecx*2+500]", BNDREG);
#ifdef XBYAK64
put("bndstx", "ptr [rax+rcx*2]", "[rax+rcx*2]", BNDREG);
put("bndstx", "ptr [r9*2]", "[r9*2]", BNDREG);
put("bndstx", "ptr [r9*2+r15]", "[r9*2+r15]", BNDREG);
#endif
}
void putFpuMem16_32() const
{

View file

@ -103,3 +103,583 @@ CYBOZU_TEST_AUTO(align)
}
} c;
}
#ifdef XBYAK64
CYBOZU_TEST_AUTO(vfmaddps)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
v4fmaddps(zmm1, zmm8, ptr [rdx + 64]);
v4fmaddss(xmm15, xmm8, ptr [rax + 64]);
v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);
v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]);
vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]);
vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf2, 0x3f, 0x48, 0x9a, 0x4a, 0x04,
0x62, 0x72, 0x3f, 0x08, 0x9b, 0x78, 0x04,
0x62, 0xf2, 0x6f, 0x4d, 0xaa, 0x69, 0x08,
0x62, 0x62, 0x6f, 0x08, 0xab, 0x7c, 0x24, 0x08,
0x62, 0xe2, 0x77, 0xcf, 0x52, 0x78, 0x04,
0x62, 0x72, 0x67, 0x4c, 0x53, 0x54, 0x84, 0x04,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vaes)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vaesdec(xmm20, xmm30, ptr [rcx + 64]);
vaesdec(ymm1, ymm2, ptr [rcx + 64]);
vaesdec(zmm1, zmm2, ptr [rcx + 64]);
vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
vaesenc(xmm20, xmm30, ptr [rcx + 64]);
vaesenc(ymm1, ymm2, ptr [rcx + 64]);
vaesenc(zmm1, zmm2, ptr [rcx + 64]);
vaesenclast(xmm20, xmm30, ptr [rax + 64]);
vaesenclast(ymm20, ymm30, ptr [rax + 64]);
vaesenclast(zmm20, zmm30, ptr [rax + 64]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xE2, 0x0D, 0x00, 0xDE, 0x61, 0x04,
0xC4, 0xE2, 0x6D, 0xDE, 0x49, 0x40,
0x62, 0xF2, 0x6D, 0x48, 0xDE, 0x49, 0x01,
0x62, 0xE2, 0x0D, 0x00, 0xDF, 0x60, 0x04,
0x62, 0xE2, 0x0D, 0x20, 0xDF, 0x60, 0x02,
0x62, 0xE2, 0x0D, 0x40, 0xDF, 0x60, 0x01,
0x62, 0xE2, 0x0D, 0x00, 0xDC, 0x61, 0x04,
0xC4, 0xE2, 0x6D, 0xDC, 0x49, 0x40,
0x62, 0xF2, 0x6D, 0x48, 0xDC, 0x49, 0x01,
0x62, 0xE2, 0x0D, 0x00, 0xDD, 0x60, 0x04,
0x62, 0xE2, 0x0D, 0x20, 0xDD, 0x60, 0x02,
0x62, 0xE2, 0x0D, 0x40, 0xDD, 0x60, 0x01,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vpclmulqdq)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
}
} c;
const uint8_t tbl[] = {
0xc4, 0xe3, 0x61, 0x44, 0x50, 0x40, 0x03,
0xc4, 0xe3, 0x65, 0x44, 0x50, 0x40, 0x03,
0x62, 0xf3, 0x65, 0x48, 0x44, 0x50, 0x01, 0x03,
0x62, 0xe3, 0x65, 0x08, 0x44, 0x60, 0x04, 0x03,
0x62, 0xe3, 0x65, 0x28, 0x44, 0x60, 0x02, 0x03,
0x62, 0xe3, 0x65, 0x48, 0x44, 0x60, 0x01, 0x03,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vcompressb_w)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vcompressb(ptr[rax + 64], xmm1);
vcompressb(xmm30 | k5, xmm1);
vcompressb(ptr[rax + 64], ymm1);
vcompressb(ymm30 | k3 |T_z, ymm1);
vcompressb(ptr[rax + 64], zmm1);
vcompressb(zmm30 | k2 |T_z, zmm1);
vcompressw(ptr[rax + 64], xmm1);
vcompressw(xmm30 | k5, xmm1);
vcompressw(ptr[rax + 64], ymm1);
vcompressw(ymm30 | k3 |T_z, ymm1);
vcompressw(ptr[rax + 64], zmm1);
vcompressw(zmm30 | k2 |T_z, zmm1);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf2, 0x7d, 0x08, 0x63, 0x48, 0x40,
0x62, 0x92, 0x7d, 0x0d, 0x63, 0xce,
0x62, 0xf2, 0x7d, 0x28, 0x63, 0x48, 0x40,
0x62, 0x92, 0x7d, 0xab, 0x63, 0xce,
0x62, 0xf2, 0x7d, 0x48, 0x63, 0x48, 0x40,
0x62, 0x92, 0x7d, 0xca, 0x63, 0xce,
0x62, 0xf2, 0xfd, 0x08, 0x63, 0x48, 0x20,
0x62, 0x92, 0xfd, 0x0d, 0x63, 0xce,
0x62, 0xf2, 0xfd, 0x28, 0x63, 0x48, 0x20,
0x62, 0x92, 0xfd, 0xab, 0x63, 0xce,
0x62, 0xf2, 0xfd, 0x48, 0x63, 0x48, 0x20,
0x62, 0x92, 0xfd, 0xca, 0x63, 0xce,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(shld)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf3, 0xed, 0x8b, 0x70, 0x68, 0x04, 0x05,
0x62, 0xf3, 0xed, 0xab, 0x70, 0x68, 0x02, 0x05,
0x62, 0xf3, 0xed, 0xcb, 0x70, 0x68, 0x01, 0x05,
0x62, 0xf3, 0x6d, 0x8b, 0x71, 0x68, 0x04, 0x05,
0x62, 0xf3, 0x6d, 0xab, 0x71, 0x68, 0x02, 0x05,
0x62, 0xf3, 0x6d, 0xcb, 0x71, 0x68, 0x01, 0x05,
0x62, 0xf3, 0xed, 0x8b, 0x71, 0x68, 0x04, 0x05,
0x62, 0xf3, 0xed, 0xab, 0x71, 0x68, 0x02, 0x05,
0x62, 0xf3, 0xed, 0xcb, 0x71, 0x68, 0x01, 0x05,
0x62, 0xf2, 0xed, 0x8b, 0x70, 0x68, 0x04,
0x62, 0xf2, 0xed, 0xab, 0x70, 0x68, 0x02,
0x62, 0xf2, 0xed, 0xcb, 0x70, 0x68, 0x01,
0x62, 0xf2, 0x6d, 0x8b, 0x71, 0x68, 0x04,
0x62, 0xf2, 0x6d, 0xab, 0x71, 0x68, 0x02,
0x62, 0xf2, 0x6d, 0xcb, 0x71, 0x68, 0x01,
0x62, 0xf2, 0xed, 0x8b, 0x71, 0x68, 0x04,
0x62, 0xf2, 0xed, 0xab, 0x71, 0x68, 0x02,
0x62, 0xf2, 0xed, 0xcb, 0x71, 0x68, 0x01,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(shrd)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf3, 0xed, 0x8b, 0x72, 0x68, 0x04, 0x05,
0x62, 0xf3, 0xed, 0xab, 0x72, 0x68, 0x02, 0x05,
0x62, 0xf3, 0xed, 0xcb, 0x72, 0x68, 0x01, 0x05,
0x62, 0xf3, 0x6d, 0x8b, 0x73, 0x68, 0x04, 0x05,
0x62, 0xf3, 0x6d, 0xab, 0x73, 0x68, 0x02, 0x05,
0x62, 0xf3, 0x6d, 0xcb, 0x73, 0x68, 0x01, 0x05,
0x62, 0xf3, 0xed, 0x8b, 0x73, 0x68, 0x04, 0x05,
0x62, 0xf3, 0xed, 0xab, 0x73, 0x68, 0x02, 0x05,
0x62, 0xf3, 0xed, 0xcb, 0x73, 0x68, 0x01, 0x05,
0x62, 0xf2, 0xed, 0x8b, 0x72, 0x68, 0x04,
0x62, 0xf2, 0xed, 0xab, 0x72, 0x68, 0x02,
0x62, 0xf2, 0xed, 0xcb, 0x72, 0x68, 0x01,
0x62, 0xf2, 0x6d, 0x8b, 0x73, 0x68, 0x04,
0x62, 0xf2, 0x6d, 0xab, 0x73, 0x68, 0x02,
0x62, 0xf2, 0x6d, 0xcb, 0x73, 0x68, 0x01,
0x62, 0xf2, 0xed, 0x8b, 0x73, 0x68, 0x04,
0x62, 0xf2, 0xed, 0xab, 0x73, 0x68, 0x02,
0x62, 0xf2, 0xed, 0xcb, 0x73, 0x68, 0x01,
0x62, 0xf3, 0x6d, 0x9b, 0x73, 0x68, 0x10, 0x05,
0x62, 0xf3, 0x6d, 0xbb, 0x73, 0x68, 0x10, 0x05,
0x62, 0xf3, 0x6d, 0xdb, 0x73, 0x68, 0x10, 0x05,
0x62, 0xf3, 0xed, 0x9b, 0x73, 0x68, 0x08, 0x05,
0x62, 0xf3, 0xed, 0xbb, 0x73, 0x68, 0x08, 0x05,
0x62, 0xf3, 0xed, 0xdb, 0x73, 0x68, 0x08, 0x05,
0x62, 0xf2, 0x6d, 0x9b, 0x73, 0x68, 0x10,
0x62, 0xf2, 0x6d, 0xbb, 0x73, 0x68, 0x10,
0x62, 0xf2, 0x6d, 0xdb, 0x73, 0x68, 0x10,
0x62, 0xf2, 0xed, 0x9b, 0x73, 0x68, 0x08,
0x62, 0xf2, 0xed, 0xbb, 0x73, 0x68, 0x08,
0x62, 0xf2, 0xed, 0xdb, 0x73, 0x68, 0x08,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vpopcnt)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf2, 0x7d, 0x8b, 0x54, 0x68, 0x04,
0x62, 0xf2, 0x7d, 0xab, 0x54, 0x68, 0x02,
0x62, 0xf2, 0x7d, 0xcb, 0x54, 0x68, 0x01,
0x62, 0xf2, 0xfd, 0x8b, 0x54, 0x68, 0x04,
0x62, 0xf2, 0xfd, 0xab, 0x54, 0x68, 0x02,
0x62, 0xf2, 0xfd, 0xcb, 0x54, 0x68, 0x01,
0x62, 0xf2, 0x7d, 0x8b, 0x55, 0x68, 0x04,
0x62, 0xf2, 0x7d, 0xab, 0x55, 0x68, 0x02,
0x62, 0xf2, 0x7d, 0xcb, 0x55, 0x68, 0x01,
0x62, 0xf2, 0x7d, 0x9b, 0x55, 0x68, 0x10,
0x62, 0xf2, 0x7d, 0xbb, 0x55, 0x68, 0x10,
0x62, 0xf2, 0x7d, 0xdb, 0x55, 0x68, 0x10,
0x62, 0xf2, 0xfd, 0x8b, 0x55, 0x68, 0x04,
0x62, 0xf2, 0xfd, 0xab, 0x55, 0x68, 0x02,
0x62, 0xf2, 0xfd, 0xcb, 0x55, 0x68, 0x01,
0x62, 0xf2, 0xfd, 0x9b, 0x55, 0x68, 0x08,
0x62, 0xf2, 0xfd, 0xbb, 0x55, 0x68, 0x08,
0x62, 0xf2, 0xfd, 0xdb, 0x55, 0x68, 0x08,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vpdpbus)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x62, 0xf2, 0x5d, 0x83, 0x50, 0x68, 0x04,
0x62, 0xf2, 0x5d, 0xa3, 0x50, 0x68, 0x02,
0x62, 0xf2, 0x5d, 0xc3, 0x50, 0x68, 0x01,
0x62, 0xf2, 0x5d, 0x93, 0x50, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xb3, 0x50, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xd3, 0x50, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0x83, 0x51, 0x68, 0x04,
0x62, 0xf2, 0x5d, 0xa3, 0x51, 0x68, 0x02,
0x62, 0xf2, 0x5d, 0xc3, 0x51, 0x68, 0x01,
0x62, 0xf2, 0x5d, 0x93, 0x51, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xb3, 0x51, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xd3, 0x51, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0x83, 0x52, 0x68, 0x04,
0x62, 0xf2, 0x5d, 0xa3, 0x52, 0x68, 0x02,
0x62, 0xf2, 0x5d, 0xc3, 0x52, 0x68, 0x01,
0x62, 0xf2, 0x5d, 0x93, 0x52, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xb3, 0x52, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xd3, 0x52, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0x83, 0x53, 0x68, 0x04,
0x62, 0xf2, 0x5d, 0xa3, 0x53, 0x68, 0x02,
0x62, 0xf2, 0x5d, 0xc3, 0x53, 0x68, 0x01,
0x62, 0xf2, 0x5d, 0x93, 0x53, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xb3, 0x53, 0x68, 0x10,
0x62, 0xf2, 0x5d, 0xd3, 0x53, 0x68, 0x10,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(vexpand_vpshufbitqmb)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
vpexpandb(xmm5|k3|T_z, xmm30);
vpexpandb(ymm5|k3|T_z, ymm30);
vpexpandb(zmm5|k3|T_z, zmm30);
vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(xmm5|k3|T_z, xmm30);
vpexpandw(ymm5|k3|T_z, ymm30);
vpexpandw(zmm5|k3|T_z, zmm30);
vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x62, 0x92, 0x7d, 0x8b, 0x62, 0xee,
0x62, 0x92, 0x7d, 0xab, 0x62, 0xee,
0x62, 0x92, 0x7d, 0xcb, 0x62, 0xee,
0x62, 0xf2, 0x7d, 0x8b, 0x62, 0x68, 0x40,
0x62, 0xf2, 0x7d, 0xab, 0x62, 0x68, 0x40,
0x62, 0xf2, 0x7d, 0xcb, 0x62, 0x68, 0x40,
0x62, 0x92, 0xfd, 0x8b, 0x62, 0xee,
0x62, 0x92, 0xfd, 0xab, 0x62, 0xee,
0x62, 0x92, 0xfd, 0xcb, 0x62, 0xee,
0x62, 0xf2, 0xfd, 0x8b, 0x62, 0x68, 0x20,
0x62, 0xf2, 0xfd, 0xab, 0x62, 0x68, 0x20,
0x62, 0xf2, 0xfd, 0xcb, 0x62, 0x68, 0x20,
0x62, 0xf2, 0x6d, 0x0a, 0x8f, 0x48, 0x04,
0x62, 0xf2, 0x6d, 0x2a, 0x8f, 0x48, 0x02,
0x62, 0xf2, 0x6d, 0x4a, 0x8f, 0x48, 0x01,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
CYBOZU_TEST_AUTO(gf2)
{
struct Code : Xbyak::CodeGenerator {
Code()
{
///
gf2p8affineinvqb(xmm1, xmm2, 3);
gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
///
gf2p8affineqb(xmm1, xmm2, 3);
gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
///
gf2p8mulb(xmm1, xmm2);
gf2p8mulb(xmm1, ptr [rax + 0x40]);
vgf2p8mulb(xmm1, xmm5, xmm2);
vgf2p8mulb(ymm1, ymm5, ymm2);
vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
vgf2p8mulb(xmm30, xmm31, xmm4);
vgf2p8mulb(ymm30, ymm31, ymm4);
vgf2p8mulb(zmm30, zmm31, zmm4);
vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
}
} c;
const uint8_t tbl[] = {
0x66, 0x0f, 0x3a, 0xcf, 0xca, 0x03,
0x66, 0x0f, 0x3a, 0xcf, 0x48, 0x40, 0x03,
0xc4, 0xe3, 0xd1, 0xcf, 0xca, 0x03,
0xc4, 0xe3, 0xd5, 0xcf, 0xca, 0x03,
0xc4, 0xe3, 0xd1, 0xcf, 0x48, 0x40, 0x03,
0xc4, 0xe3, 0xd5, 0xcf, 0x48, 0x40, 0x03,
0x62, 0x63, 0x85, 0x00, 0xcf, 0xf4, 0x05,
0x62, 0x63, 0x85, 0x20, 0xcf, 0xf4, 0x05,
0x62, 0x63, 0x85, 0x40, 0xcf, 0xf4, 0x05,
0x62, 0x63, 0xd5, 0x89, 0xcf, 0x70, 0x04, 0x05,
0x62, 0x63, 0xd5, 0xa9, 0xcf, 0x70, 0x02, 0x05,
0x62, 0x63, 0xd5, 0xc9, 0xcf, 0x70, 0x01, 0x05,
0x62, 0x63, 0xd5, 0x99, 0xcf, 0x70, 0x08, 0x05,
0x62, 0x63, 0xd5, 0xb9, 0xcf, 0x70, 0x08, 0x05,
0x62, 0x63, 0xd5, 0xd9, 0xcf, 0x70, 0x08, 0x05,
0x66, 0x0f, 0x3a, 0xce, 0xca, 0x03,
0x66, 0x0f, 0x3a, 0xce, 0x48, 0x40, 0x03,
0xc4, 0xe3, 0xd1, 0xce, 0xca, 0x03,
0xc4, 0xe3, 0xd5, 0xce, 0xca, 0x03,
0xc4, 0xe3, 0xd1, 0xce, 0x48, 0x40, 0x03,
0xc4, 0xe3, 0xd5, 0xce, 0x48, 0x40, 0x03,
0x62, 0x63, 0x85, 0x00, 0xce, 0xf4, 0x05,
0x62, 0x63, 0x85, 0x20, 0xce, 0xf4, 0x05,
0x62, 0x63, 0x85, 0x40, 0xce, 0xf4, 0x05,
0x62, 0x63, 0xd5, 0x89, 0xce, 0x70, 0x04, 0x05,
0x62, 0x63, 0xd5, 0xa9, 0xce, 0x70, 0x02, 0x05,
0x62, 0x63, 0xd5, 0xc9, 0xce, 0x70, 0x01, 0x05,
0x62, 0x63, 0xd5, 0x99, 0xce, 0x70, 0x08, 0x05,
0x62, 0x63, 0xd5, 0xb9, 0xce, 0x70, 0x08, 0x05,
0x62, 0x63, 0xd5, 0xd9, 0xce, 0x70, 0x08, 0x05,
0x66, 0x0f, 0x38, 0xcf, 0xca,
0x66, 0x0f, 0x38, 0xcf, 0x48, 0x40,
0xc4, 0xe2, 0x51, 0xcf, 0xca,
0xc4, 0xe2, 0x55, 0xcf, 0xca,
0xc4, 0xe2, 0x51, 0xcf, 0x48, 0x40,
0xc4, 0xe2, 0x55, 0xcf, 0x48, 0x40,
0x62, 0x62, 0x05, 0x00, 0xcf, 0xf4,
0x62, 0x62, 0x05, 0x20, 0xcf, 0xf4,
0x62, 0x62, 0x05, 0x40, 0xcf, 0xf4,
0x62, 0x62, 0x55, 0x89, 0xcf, 0x70, 0x04,
0x62, 0x62, 0x55, 0xa9, 0xcf, 0x70, 0x02,
0x62, 0x62, 0x55, 0xc9, 0xcf, 0x70, 0x01,
};
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
CYBOZU_TEST_EQUAL(c.getSize(), n);
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
}
#endif

View file

@ -6,6 +6,7 @@ using namespace Xbyak;
#ifdef _MSC_VER
#pragma warning(disable : 4245)
#pragma warning(disable : 4312)
#endif
class Sample : public CodeGenerator {
void operator=(const Sample&);

View file

@ -1,5 +1,5 @@
@echo off
set FILTER=cat
set FILTER=grep -v warning
if /i "%1"=="64" (
set OPT2=-DXBYAK64
set OPT3=win64