[llvm] c933c2e - [PowerPC] Add BCD add/sub/cmp builtins
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 23 09:42:47 PST 2021
Author: Nemanja Ivanovic
Date: 2021-11-23T11:42:36-06:00
New Revision: c933c2eb334660c131f4afc9d194fafb0cec0423
URL: https://github.com/llvm/llvm-project/commit/c933c2eb334660c131f4afc9d194fafb0cec0423
DIFF: https://github.com/llvm/llvm-project/commit/c933c2eb334660c131f4afc9d194fafb0cec0423.diff
LOG: [PowerPC] Add BCD add/sub/cmp builtins
Support for builtins that use bcdadd./bcdsub. to add/subtract
Binary Coded Decimal values as well as to determine validity
and compare BCD values.
Differential revision: https://reviews.llvm.org/D114088
Added:
llvm/test/CodeGen/PowerPC/bcd-intrinsics.ll
Modified:
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/P10InstrResources.td
llvm/lib/Target/PowerPC/P9InstrResources.td
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
llvm/lib/Target/PowerPC/PPCInstrAltivec.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 8290342c0d51a..1d249310fc3f8 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -524,6 +524,20 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">,
Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>;
+ // BCD intrinsics.
+ def int_ppc_bcdadd : GCCBuiltin<"__builtin_ppc_bcdadd">, Intrinsic<
+ [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_ppc_bcdadd_p : GCCBuiltin<"__builtin_ppc_bcdadd_p">, Intrinsic<
+ [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<0>>]>;
+ def int_ppc_bcdsub : GCCBuiltin<"__builtin_ppc_bcdsub">, Intrinsic<
+ [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_ppc_bcdsub_p : GCCBuiltin<"__builtin_ppc_bcdsub_p">, Intrinsic<
+ [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<0>>]>;
+
// P10 Vector Extract with Mask
def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index f43ba00ec3733..5906564810864 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -626,7 +626,9 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
// 5 Cycles Fixed-Point and BCD operations, 3 input operands
def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
(instrs
+ BCDADD_rec,
BCDS_rec,
+ BCDSUB_rec,
BCDTRUNC_rec,
VADDECUQ,
VADDEUQM,
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index c4f4a2b3d7963..070a662179da7 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -624,7 +624,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
BCDS_rec,
BCDTRUNC_rec,
BCDUS_rec,
- BCDUTRUNC_rec
+ BCDUTRUNC_rec,
+ BCDADD_rec,
+ BCDSUB_rec
)>;
// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 0abdf81d09083..247faa8b46a84 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -5049,16 +5049,94 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// value for the comparison. When selecting through a .td file, a type
// error is raised. Must check this first so we never break on the
// !Subtarget->isISA3_1() check.
- if (N->getConstantOperandVal(0) == Intrinsic::ppc_fsels) {
+ auto IntID = N->getConstantOperandVal(0);
+ if (IntID == Intrinsic::ppc_fsels) {
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
return;
}
+ if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
+ auto Pred = N->getConstantOperandVal(1);
+ unsigned Opcode =
+ IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
+ unsigned SubReg = 0;
+ unsigned ShiftVal = 0;
+ bool Reverse = false;
+ switch (Pred) {
+ case 0:
+ SubReg = PPC::sub_eq;
+ ShiftVal = 1;
+ break;
+ case 1:
+ SubReg = PPC::sub_eq;
+ ShiftVal = 1;
+ Reverse = true;
+ break;
+ case 2:
+ SubReg = PPC::sub_lt;
+ ShiftVal = 3;
+ break;
+ case 3:
+ SubReg = PPC::sub_lt;
+ ShiftVal = 3;
+ Reverse = true;
+ break;
+ case 4:
+ SubReg = PPC::sub_gt;
+ ShiftVal = 2;
+ break;
+ case 5:
+ SubReg = PPC::sub_gt;
+ ShiftVal = 2;
+ Reverse = true;
+ break;
+ case 6:
+ SubReg = PPC::sub_un;
+ break;
+ case 7:
+ SubReg = PPC::sub_un;
+ Reverse = true;
+ break;
+ }
+
+ EVT VTs[] = {MVT::v16i8, MVT::Glue};
+ SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
+ CurDAG->getTargetConstant(0, dl, MVT::i32)};
+ SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
+ SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
+ // On Power10, we can use SETBC[R]. On prior architectures, we have to use
+ // MFOCRF and shift/negate the value.
+ if (Subtarget->isISA3_1()) {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
+ SDValue CRBit = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+ CR6Reg, SubRegIdx, BCDOp.getValue(1)),
+ 0);
+ CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
+ CRBit);
+ } else {
+ SDValue Move =
+ SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
+ BCDOp.getValue(1)),
+ 0);
+ SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
+ getI32Imm(31, dl), getI32Imm(31, dl)};
+ if (!Reverse)
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ else {
+ SDValue Shift = SDValue(
+ CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
+ }
+ }
+ return;
+ }
+
if (!Subtarget->isISA3_1())
break;
unsigned Opcode = 0;
- switch (N->getConstantOperandVal(0)) {
+ switch (IntID) {
default:
break;
case Intrinsic::ppc_altivec_vstribr_p:
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 1e0e2d88e54be..fe21a164dfab4 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1161,6 +1161,22 @@ def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot v4i32:$vB)))),
} // end HasAltivec
+// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
+class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
+ : VX_RD5_RSp5_PS1_XO9<xo,
+ (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS),
+ !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> {
+ let Defs = [CR6];
+}
+
+// [PO VRT VRA VRB 1 / XO]
+class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
+ : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> {
+ let Defs = [CR6];
+ let PS = 0;
+}
+
def HasP8Altivec : Predicate<"Subtarget->hasP8Altivec()">;
def HasP8Crypto : Predicate<"Subtarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
@@ -1351,6 +1367,13 @@ def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw,
v2i64, v4i32>;
def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw,
v2i64, v4i32>;
+def BCDADD_rec : VX_VT5_VA5_VB5_PS1_XO9_o<1, "bcdadd." , []>;
+def BCDSUB_rec : VX_VT5_VA5_VB5_PS1_XO9_o<65, "bcdsub." , []>;
+
+def : Pat<(v16i8 (int_ppc_bcdadd v16i8:$vA, v16i8:$vB, timm:$PS)),
+ (BCDADD_rec $vA, $vB, $PS)>;
+def : Pat<(v16i8 (int_ppc_bcdsub v16i8:$vA, v16i8:$vB, timm:$PS)),
+ (BCDSUB_rec $vA, $vB, $PS)>;
// Shuffle patterns for unary and swapped (LE) vector pack modulo.
def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef),
@@ -1598,22 +1621,6 @@ def BCDCPSGN_rec : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>;
def BCDSETSGN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>;
-// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
-class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
- : VX_RD5_RSp5_PS1_XO9<xo,
- (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS),
- !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> {
- let Defs = [CR6];
-}
-
-// [PO VRT VRA VRB 1 / XO]
-class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
- : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> {
- let Defs = [CR6];
- let PS = 0;
-}
-
// Decimal Shift/Unsigned-Shift/Shift-and-Round
def BCDS_rec : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>;
def BCDUS_rec : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>;
diff --git a/llvm/test/CodeGen/PowerPC/bcd-intrinsics.ll b/llvm/test/CodeGen/PowerPC/bcd-intrinsics.ll
new file mode 100644
index 0000000000000..fa9f656c4b209
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/bcd-intrinsics.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-P9
+
+define dso_local i64 @test_invalid(<16 x i8> %a) local_unnamed_addr #0 {
+; CHECK-LABEL: test_invalid:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v2, 0
+; CHECK-NEXT: setbc r3, 4*cr6+un
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_invalid:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v2, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 28, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 6, <16 x i8> %a, <16 x i8> %a) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local <16 x i8> @test_add(<16 x i8> %a, <16 x i8> %b, i64 %ps) local_unnamed_addr #0 {
+; CHECK-LABEL: test_add:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdadd. v2, v2, v3, 1
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_add:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdadd. v2, v2, v3, 1
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call <16 x i8> @llvm.ppc.bcdadd(<16 x i8> %a, <16 x i8> %b, i32 1)
+ ret <16 x i8> %0
+}
+
+define dso_local i64 @test_add_ofl(<16 x i8> %a, <16 x i8> %b, i64 %ps) local_unnamed_addr #0 {
+; CHECK-LABEL: test_add_ofl:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdadd. v2, v2, v3, 0
+; CHECK-NEXT: setbc r3, 4*cr6+un
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_add_ofl:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdadd. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 28, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdadd.p(i32 6, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local <16 x i8> @test_sub(<16 x i8> %a, <16 x i8> %b, i64 %ps) local_unnamed_addr #0 {
+; CHECK-LABEL: test_sub:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_sub:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call <16 x i8> @llvm.ppc.bcdsub(<16 x i8> %a, <16 x i8> %b, i32 0)
+ ret <16 x i8> %0
+}
+
+define dso_local i64 @test_sub_ofl(<16 x i8> %a, <16 x i8> %b, i64 %ps) local_unnamed_addr #0 {
+; CHECK-LABEL: test_sub_ofl:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbc r3, 4*cr6+un
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_sub_ofl:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 28, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 6, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local i64 @test_cmplt(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_cmplt:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbc r3, 4*cr6+lt
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_cmplt:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 25, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 2, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local i64 @test_cmpgt(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_cmpgt:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbc r3, 4*cr6+gt
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_cmpgt:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 26, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 4, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local i64 @test_cmpeq(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_cmpeq:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbc r3, 4*cr6+eq
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_cmpeq:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 27, 31, 31
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 0, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local i64 @test_cmpge(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_cmpge:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbcr r3, 4*cr6+lt
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_cmpge:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 25, 31, 31
+; CHECK-P9-NEXT: xori r3, r3, 1
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 3, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+define dso_local i64 @test_cmple(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr #0 {
+; CHECK-LABEL: test_cmple:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-NEXT: setbcr r3, 4*cr6+gt
+; CHECK-NEXT: extsw r3, r3
+; CHECK-NEXT: blr
+;
+; CHECK-P9-LABEL: test_cmple:
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: bcdsub. v2, v2, v3, 0
+; CHECK-P9-NEXT: mfocrf r3, 2
+; CHECK-P9-NEXT: rlwinm r3, r3, 26, 31, 31
+; CHECK-P9-NEXT: xori r3, r3, 1
+; CHECK-P9-NEXT: extsw r3, r3
+; CHECK-P9-NEXT: blr
+entry:
+ %0 = tail call i32 @llvm.ppc.bcdsub.p(i32 5, <16 x i8> %a, <16 x i8> %b) #2
+ %conv.i = sext i32 %0 to i64
+ ret i64 %conv.i
+}
+
+declare i32 @llvm.ppc.bcdsub.p(i32 immarg, <16 x i8>, <16 x i8>) #1
+declare i32 @llvm.ppc.bcdadd.p(i32 immarg, <16 x i8>, <16 x i8>) #1
+declare <16 x i8> @llvm.ppc.bcdadd(<16 x i8>, <16 x i8>, i32 immarg) #1
+declare <16 x i8> @llvm.ppc.bcdsub(<16 x i8>, <16 x i8>, i32 immarg) #1
More information about the llvm-commits
mailing list