mirror of
https://git.suyu.dev/suyu/dynarmic.git
synced 2025-12-24 16:24:47 +01:00
Squashed 'externals/xbyak/' changes from d512551e..2794cde7
2794cde7 add xword, yword, etc. in Xbyak::util
fb9c04e4 fix document for vfpclassps
a51be78b fix test dependency
04fdfb1e update version
e6354f8b add vgf2p8mulb
09a12642 add gf2p8affineqb
d171ba0e add gf2p8affineinvqb
457f4fd0 add vpshufbitqmb
5af0ba39 add vpexpand{b,w}
e450f965 vpopcnt{d,q} supports ptr_b
48499eb1 add vpdpbusd(s), vpdpwssd(s)
9c745109 add vpdpbusd, vpdpbusds
0e1a11b4 add vpopcnt{b,w,d,q}
9acfc132 add vpshrd(v){w,d,q}
ac8de850 add vpshld(v){w,d,q}
f181c259 add vcompressb, vcompressw
5a402477 vpclmulqdq supports AVX-512
9e16b40b vaes* supports AVX-512
7fde08e0 add flags for intel's manual 319433-030.pdf
c5da3778 add test of v4fmaddps, vp4dpwssd, etc.
e4fc9d8a fix mpx encoding
d0b2fb62 add bnd(0xf2) prefix for MPX
f12b5678 use db for array
cd74ab44 remove bat file
git-subtree-dir: externals/xbyak
git-subtree-split: 2794cde79eb71e86490061cac9622ad0067b8d15
This commit is contained in:
parent
4ed09fda06
commit
9fb82036ca
16 changed files with 935 additions and 173 deletions
580
test/misc.cpp
580
test/misc.cpp
|
|
@ -103,3 +103,583 @@ CYBOZU_TEST_AUTO(align)
|
|||
}
|
||||
} c;
|
||||
}
|
||||
|
||||
#ifdef XBYAK64
|
||||
CYBOZU_TEST_AUTO(vfmaddps)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
v4fmaddps(zmm1, zmm8, ptr [rdx + 64]);
|
||||
v4fmaddss(xmm15, xmm8, ptr [rax + 64]);
|
||||
v4fnmaddps(zmm5 | k5, zmm2, ptr [rcx + 0x80]);
|
||||
v4fnmaddss(xmm31, xmm2, ptr [rsp + 0x80]);
|
||||
vp4dpwssd(zmm23 | k7 | T_z, zmm1, ptr [rax + 64]);
|
||||
vp4dpwssds(zmm10 | k4, zmm3, ptr [rsp + rax * 4 + 64]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x3f, 0x48, 0x9a, 0x4a, 0x04,
|
||||
0x62, 0x72, 0x3f, 0x08, 0x9b, 0x78, 0x04,
|
||||
0x62, 0xf2, 0x6f, 0x4d, 0xaa, 0x69, 0x08,
|
||||
0x62, 0x62, 0x6f, 0x08, 0xab, 0x7c, 0x24, 0x08,
|
||||
0x62, 0xe2, 0x77, 0xcf, 0x52, 0x78, 0x04,
|
||||
0x62, 0x72, 0x67, 0x4c, 0x53, 0x54, 0x84, 0x04,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vaes)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vaesdec(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesdec(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesdec(zmm1, zmm2, ptr [rcx + 64]);
|
||||
|
||||
vaesdeclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesdeclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesdeclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
|
||||
vaesenc(xmm20, xmm30, ptr [rcx + 64]);
|
||||
vaesenc(ymm1, ymm2, ptr [rcx + 64]);
|
||||
vaesenc(zmm1, zmm2, ptr [rcx + 64]);
|
||||
|
||||
vaesenclast(xmm20, xmm30, ptr [rax + 64]);
|
||||
vaesenclast(ymm20, ymm30, ptr [rax + 64]);
|
||||
vaesenclast(zmm20, zmm30, ptr [rax + 64]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDE, 0x61, 0x04,
|
||||
0xC4, 0xE2, 0x6D, 0xDE, 0x49, 0x40,
|
||||
0x62, 0xF2, 0x6D, 0x48, 0xDE, 0x49, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDF, 0x60, 0x04,
|
||||
0x62, 0xE2, 0x0D, 0x20, 0xDF, 0x60, 0x02,
|
||||
0x62, 0xE2, 0x0D, 0x40, 0xDF, 0x60, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDC, 0x61, 0x04,
|
||||
0xC4, 0xE2, 0x6D, 0xDC, 0x49, 0x40,
|
||||
0x62, 0xF2, 0x6D, 0x48, 0xDC, 0x49, 0x01,
|
||||
|
||||
0x62, 0xE2, 0x0D, 0x00, 0xDD, 0x60, 0x04,
|
||||
0x62, 0xE2, 0x0D, 0x20, 0xDD, 0x60, 0x02,
|
||||
0x62, 0xE2, 0x0D, 0x40, 0xDD, 0x60, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpclmulqdq)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpclmulqdq(xmm2, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm2, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm2, zmm3, ptr [rax + 64], 3);
|
||||
|
||||
vpclmulqdq(xmm20, xmm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(ymm20, ymm3, ptr [rax + 64], 3);
|
||||
vpclmulqdq(zmm20, zmm3, ptr [rax + 64], 3);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0xc4, 0xe3, 0x61, 0x44, 0x50, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0x65, 0x44, 0x50, 0x40, 0x03,
|
||||
0x62, 0xf3, 0x65, 0x48, 0x44, 0x50, 0x01, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x08, 0x44, 0x60, 0x04, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x28, 0x44, 0x60, 0x02, 0x03,
|
||||
0x62, 0xe3, 0x65, 0x48, 0x44, 0x60, 0x01, 0x03,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vcompressb_w)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vcompressb(ptr[rax + 64], xmm1);
|
||||
vcompressb(xmm30 | k5, xmm1);
|
||||
vcompressb(ptr[rax + 64], ymm1);
|
||||
vcompressb(ymm30 | k3 |T_z, ymm1);
|
||||
vcompressb(ptr[rax + 64], zmm1);
|
||||
vcompressb(zmm30 | k2 |T_z, zmm1);
|
||||
|
||||
vcompressw(ptr[rax + 64], xmm1);
|
||||
vcompressw(xmm30 | k5, xmm1);
|
||||
vcompressw(ptr[rax + 64], ymm1);
|
||||
vcompressw(ymm30 | k3 |T_z, ymm1);
|
||||
vcompressw(ptr[rax + 64], zmm1);
|
||||
vcompressw(zmm30 | k2 |T_z, zmm1);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x7d, 0x08, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0x0d, 0x63, 0xce,
|
||||
0x62, 0xf2, 0x7d, 0x28, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0xab, 0x63, 0xce,
|
||||
0x62, 0xf2, 0x7d, 0x48, 0x63, 0x48, 0x40,
|
||||
0x62, 0x92, 0x7d, 0xca, 0x63, 0xce,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x08, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0x0d, 0x63, 0xce,
|
||||
0x62, 0xf2, 0xfd, 0x28, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0xab, 0x63, 0xce,
|
||||
0x62, 0xf2, 0xfd, 0x48, 0x63, 0x48, 0x20,
|
||||
0x62, 0x92, 0xfd, 0xca, 0x63, 0xce,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(shld)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpshldw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshldq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshldvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshldvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshldvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshldvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshldvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x70, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x70, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x70, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x8b, 0x71, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xab, 0x71, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xcb, 0x71, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x71, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x71, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x71, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x70, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x70, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x70, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x8b, 0x71, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0xab, 0x71, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0xcb, 0x71, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x71, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x71, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x71, 0x68, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(shrd)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpshrdw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40], 5);
|
||||
|
||||
vpshrdvw(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvw(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr [rax + 0x40]);
|
||||
|
||||
vpshrdd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
|
||||
vpshrdq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40], 5);
|
||||
vpshrdq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40], 5);
|
||||
|
||||
vpshrdvd(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvd(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
|
||||
vpshrdvq(xmm5|k3|T_z, xmm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(ymm5|k3|T_z, ymm2, ptr_b [rax + 0x40]);
|
||||
vpshrdvq(zmm5|k3|T_z, zmm2, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x72, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x72, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x72, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x8b, 0x73, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xab, 0x73, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xcb, 0x73, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x8b, 0x73, 0x68, 0x04, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xab, 0x73, 0x68, 0x02, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xcb, 0x73, 0x68, 0x01, 0x05,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x72, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x72, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x72, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x8b, 0x73, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0xab, 0x73, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0xcb, 0x73, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x8b, 0x73, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xed, 0xab, 0x73, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xed, 0xcb, 0x73, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf3, 0x6d, 0x9b, 0x73, 0x68, 0x10, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xbb, 0x73, 0x68, 0x10, 0x05,
|
||||
0x62, 0xf3, 0x6d, 0xdb, 0x73, 0x68, 0x10, 0x05,
|
||||
|
||||
0x62, 0xf3, 0xed, 0x9b, 0x73, 0x68, 0x08, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xbb, 0x73, 0x68, 0x08, 0x05,
|
||||
0x62, 0xf3, 0xed, 0xdb, 0x73, 0x68, 0x08, 0x05,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x9b, 0x73, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x6d, 0xbb, 0x73, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x6d, 0xdb, 0x73, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0xed, 0x9b, 0x73, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xed, 0xbb, 0x73, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xed, 0xdb, 0x73, 0x68, 0x08,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpopcnt)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpopcntb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntd(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntd(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntd(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
|
||||
vpopcntq(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpopcntq(xmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(ymm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
vpopcntq(zmm5|k3|T_z, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x54, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x54, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x54, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x54, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x54, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x54, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x55, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x55, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x55, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x7d, 0x9b, 0x55, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x7d, 0xbb, 0x55, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x7d, 0xdb, 0x55, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x55, 0x68, 0x04,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x55, 0x68, 0x02,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x55, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0xfd, 0x9b, 0x55, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xfd, 0xbb, 0x55, 0x68, 0x08,
|
||||
0x62, 0xf2, 0xfd, 0xdb, 0x55, 0x68, 0x08,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vpdpbus)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpbusd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpbusds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpbusds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpwssd(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssd(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr [rax + 0x40]);
|
||||
|
||||
vpdpwssds(xmm5|k3|T_z, xmm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(ymm5|k3|T_z, ymm20, ptr_b [rax + 0x40]);
|
||||
vpdpwssds(zmm5|k3|T_z, zmm20, ptr_b [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x50, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x50, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x50, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x50, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x50, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x50, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x51, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x51, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x51, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x51, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x51, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x51, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x52, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x52, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x52, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x52, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x52, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x52, 0x68, 0x10,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x83, 0x53, 0x68, 0x04,
|
||||
0x62, 0xf2, 0x5d, 0xa3, 0x53, 0x68, 0x02,
|
||||
0x62, 0xf2, 0x5d, 0xc3, 0x53, 0x68, 0x01,
|
||||
|
||||
0x62, 0xf2, 0x5d, 0x93, 0x53, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xb3, 0x53, 0x68, 0x10,
|
||||
0x62, 0xf2, 0x5d, 0xd3, 0x53, 0x68, 0x10,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(vexpand_vpshufbitqmb)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
vpexpandb(xmm5|k3|T_z, xmm30);
|
||||
vpexpandb(ymm5|k3|T_z, ymm30);
|
||||
vpexpandb(zmm5|k3|T_z, zmm30);
|
||||
vpexpandb(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandb(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpexpandw(xmm5|k3|T_z, xmm30);
|
||||
vpexpandw(ymm5|k3|T_z, ymm30);
|
||||
vpexpandw(zmm5|k3|T_z, zmm30);
|
||||
vpexpandw(xmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(ymm5|k3|T_z, ptr [rax + 0x40]);
|
||||
vpexpandw(zmm5|k3|T_z, ptr [rax + 0x40]);
|
||||
|
||||
vpshufbitqmb(k1|k2, xmm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, ymm2, ptr [rax + 0x40]);
|
||||
vpshufbitqmb(k1|k2, zmm2, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x62, 0x92, 0x7d, 0x8b, 0x62, 0xee,
|
||||
0x62, 0x92, 0x7d, 0xab, 0x62, 0xee,
|
||||
0x62, 0x92, 0x7d, 0xcb, 0x62, 0xee,
|
||||
0x62, 0xf2, 0x7d, 0x8b, 0x62, 0x68, 0x40,
|
||||
0x62, 0xf2, 0x7d, 0xab, 0x62, 0x68, 0x40,
|
||||
0x62, 0xf2, 0x7d, 0xcb, 0x62, 0x68, 0x40,
|
||||
|
||||
0x62, 0x92, 0xfd, 0x8b, 0x62, 0xee,
|
||||
0x62, 0x92, 0xfd, 0xab, 0x62, 0xee,
|
||||
0x62, 0x92, 0xfd, 0xcb, 0x62, 0xee,
|
||||
0x62, 0xf2, 0xfd, 0x8b, 0x62, 0x68, 0x20,
|
||||
0x62, 0xf2, 0xfd, 0xab, 0x62, 0x68, 0x20,
|
||||
0x62, 0xf2, 0xfd, 0xcb, 0x62, 0x68, 0x20,
|
||||
|
||||
0x62, 0xf2, 0x6d, 0x0a, 0x8f, 0x48, 0x04,
|
||||
0x62, 0xf2, 0x6d, 0x2a, 0x8f, 0x48, 0x02,
|
||||
0x62, 0xf2, 0x6d, 0x4a, 0x8f, 0x48, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
CYBOZU_TEST_AUTO(gf2)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
{
|
||||
///
|
||||
gf2p8affineinvqb(xmm1, xmm2, 3);
|
||||
gf2p8affineinvqb(xmm1, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineinvqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineinvqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineinvqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineinvqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineinvqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineinvqb(zmm30, zmm31, zmm4, 5);
|
||||
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
|
||||
vgf2p8affineinvqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineinvqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
///
|
||||
gf2p8affineqb(xmm1, xmm2, 3);
|
||||
gf2p8affineqb(xmm1, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineqb(xmm1, xmm5, xmm2, 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ymm2, 3);
|
||||
vgf2p8affineqb(xmm1, xmm5, ptr [rax + 0x40], 3);
|
||||
vgf2p8affineqb(ymm1, ymm5, ptr [rax + 0x40], 3);
|
||||
|
||||
vgf2p8affineqb(xmm30, xmm31, xmm4, 5);
|
||||
vgf2p8affineqb(ymm30, ymm31, ymm4, 5);
|
||||
vgf2p8affineqb(zmm30, zmm31, zmm4, 5);
|
||||
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40], 5);
|
||||
|
||||
vgf2p8affineqb(xmm30|k1|T_z, xmm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(ymm30|k1|T_z, ymm5, ptr_b [rax + 0x40], 5);
|
||||
vgf2p8affineqb(zmm30|k1|T_z, zmm5, ptr_b [rax + 0x40], 5);
|
||||
///
|
||||
gf2p8mulb(xmm1, xmm2);
|
||||
gf2p8mulb(xmm1, ptr [rax + 0x40]);
|
||||
|
||||
vgf2p8mulb(xmm1, xmm5, xmm2);
|
||||
vgf2p8mulb(ymm1, ymm5, ymm2);
|
||||
vgf2p8mulb(xmm1, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm1, ymm5, ptr [rax + 0x40]);
|
||||
|
||||
vgf2p8mulb(xmm30, xmm31, xmm4);
|
||||
vgf2p8mulb(ymm30, ymm31, ymm4);
|
||||
vgf2p8mulb(zmm30, zmm31, zmm4);
|
||||
|
||||
vgf2p8mulb(xmm30|k1|T_z, xmm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(ymm30|k1|T_z, ymm5, ptr [rax + 0x40]);
|
||||
vgf2p8mulb(zmm30|k1|T_z, zmm5, ptr [rax + 0x40]);
|
||||
}
|
||||
} c;
|
||||
const uint8_t tbl[] = {
|
||||
0x66, 0x0f, 0x3a, 0xcf, 0xca, 0x03,
|
||||
0x66, 0x0f, 0x3a, 0xcf, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xcf, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xcf, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xcf, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xcf, 0x48, 0x40, 0x03,
|
||||
0x62, 0x63, 0x85, 0x00, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x20, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x40, 0xcf, 0xf4, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x89, 0xcf, 0x70, 0x04, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xa9, 0xcf, 0x70, 0x02, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xc9, 0xcf, 0x70, 0x01, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x99, 0xcf, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xb9, 0xcf, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xd9, 0xcf, 0x70, 0x08, 0x05,
|
||||
|
||||
0x66, 0x0f, 0x3a, 0xce, 0xca, 0x03,
|
||||
0x66, 0x0f, 0x3a, 0xce, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xce, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xce, 0xca, 0x03,
|
||||
0xc4, 0xe3, 0xd1, 0xce, 0x48, 0x40, 0x03,
|
||||
0xc4, 0xe3, 0xd5, 0xce, 0x48, 0x40, 0x03,
|
||||
0x62, 0x63, 0x85, 0x00, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x20, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0x85, 0x40, 0xce, 0xf4, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x89, 0xce, 0x70, 0x04, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xa9, 0xce, 0x70, 0x02, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xc9, 0xce, 0x70, 0x01, 0x05,
|
||||
0x62, 0x63, 0xd5, 0x99, 0xce, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xb9, 0xce, 0x70, 0x08, 0x05,
|
||||
0x62, 0x63, 0xd5, 0xd9, 0xce, 0x70, 0x08, 0x05,
|
||||
|
||||
0x66, 0x0f, 0x38, 0xcf, 0xca,
|
||||
0x66, 0x0f, 0x38, 0xcf, 0x48, 0x40,
|
||||
0xc4, 0xe2, 0x51, 0xcf, 0xca,
|
||||
0xc4, 0xe2, 0x55, 0xcf, 0xca,
|
||||
0xc4, 0xe2, 0x51, 0xcf, 0x48, 0x40,
|
||||
0xc4, 0xe2, 0x55, 0xcf, 0x48, 0x40,
|
||||
0x62, 0x62, 0x05, 0x00, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x05, 0x20, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x05, 0x40, 0xcf, 0xf4,
|
||||
0x62, 0x62, 0x55, 0x89, 0xcf, 0x70, 0x04,
|
||||
0x62, 0x62, 0x55, 0xa9, 0xcf, 0x70, 0x02,
|
||||
0x62, 0x62, 0x55, 0xc9, 0xcf, 0x70, 0x01,
|
||||
};
|
||||
const size_t n = sizeof(tbl) / sizeof(tbl[0]);
|
||||
CYBOZU_TEST_EQUAL(c.getSize(), n);
|
||||
CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue