Implement UHSUB8 and UHSUB16 (#48)

This commit is contained in:
Merry 2016-11-26 18:27:21 +00:00 committed by GitHub
parent cb17f9a3ed
commit 0ff8c375af
5 changed files with 86 additions and 2 deletions

View file

@ -1397,6 +1397,66 @@ void EmitX64::EmitPackedHalvingAddS16(IR::Block& block, IR::Inst* inst) {
code->xor(result, carry);
}
void EmitX64::EmitPackedHalvingSubU8(IR::Block& block, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32();
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
code->xor(minuend, subtrahend);
code->and(subtrahend, minuend);
code->shr(minuend, 1);
// At this point,
// minuend := (a^b) >> 1
// subtrahend := (a^b) & b
// We must now perform a partitioned subtraction.
// We can do this because minuend contains 7 bit fields.
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
// We invert this bit at the end as this tells us if that bit was borrowed from.
code->or(minuend, 0x80808080);
code->sub(minuend, subtrahend);
code->xor(minuend, 0x80808080);
// minuend now contains the desired result.
}
void EmitX64::EmitPackedHalvingSubU16(IR::Block& block, IR::Inst* inst) {
IR::Value a = inst->GetArg(0);
IR::Value b = inst->GetArg(1);
Xbyak::Reg32 minuend = reg_alloc.UseDefGpr(a, inst).cvt32();
Xbyak::Reg32 subtrahend = reg_alloc.UseScratchGpr(b).cvt32();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
// Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y).
code->xor(minuend, subtrahend);
code->and(subtrahend, minuend);
code->shr(minuend, 1);
// At this point,
// minuend := (a^b) >> 1
// subtrahend := (a^b) & b
// We must now perform a partitioned subtraction.
// We can do this because minuend contains 15 bit fields.
// We use the extra bit in minuend as a bit to borrow from; we set this bit.
// We invert this bit at the end as this tells us if that bit was borrowed from.
code->or(minuend, 0x80008000);
code->sub(minuend, subtrahend);
code->xor(minuend, 0x80008000);
// minuend now contains the desired result.
}
void EmitX64::EmitPackedSaturatedAddU8(IR::Block& block, IR::Inst* inst) {
EmitPackedOperation(code, reg_alloc, inst, &Xbyak::CodeGenerator::paddusb);
}