[llvm] [PowerPC] Use setbc for values from vector compare conditions (PR #114858)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 11:53:31 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: None (RolandF77)
<details>
<summary>Changes</summary>
For P10 use the setbc instruction to get int values from vector compare conditions.
---
Full diff: https://github.com/llvm/llvm-project/pull/114858.diff
5 Files Affected:
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+45-17)
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+6)
- (modified) llvm/lib/Target/PowerPC/PPCInstrP10.td (+10-2)
- (added) llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll (+15)
- (added) llvm/test/CodeGen/PowerPC/vcmp-setbc.ll (+46)
``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d8f3095ed7fb68..a5cd136478c096 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1858,6 +1858,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
case PPCISD::STORE_COND:
return "PPCISD::STORE_COND";
+ case PPCISD::SETBC:
+ return "PPCISD::SETBC";
+ case PPCISD::SETBCR:
+ return "PPCISD::SETBCR";
}
return nullptr;
}
@@ -11264,31 +11268,55 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
- // Now that we have the comparison, emit a copy from the CR to a GPR.
- // This is flagged to the above dot comparison.
- SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
- DAG.getRegister(PPC::CR6, MVT::i32),
- CompNode.getValue(1));
-
// Unpack the result based on how the target uses it.
- unsigned BitNo; // Bit # of CR6.
- bool InvertBit; // Invert result?
+ unsigned BitNo; // Bit # of CR6.
+ bool InvertBit; // Invert result?
+ unsigned Bitx;
+ unsigned SetOp;
switch (Op.getConstantOperandVal(1)) {
- default: // Can't happen, don't crash on invalid number though.
- case 0: // Return the value of the EQ bit of CR6.
- BitNo = 0; InvertBit = false;
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Return the value of the EQ bit of CR6.
+ BitNo = 0;
+ InvertBit = false;
+ Bitx = PPC::sub_eq;
+ SetOp = PPCISD::SETBC;
break;
- case 1: // Return the inverted value of the EQ bit of CR6.
- BitNo = 0; InvertBit = true;
+ case 1: // Return the inverted value of the EQ bit of CR6.
+ BitNo = 0;
+ InvertBit = true;
+ Bitx = PPC::sub_eq;
+ SetOp = PPCISD::SETBCR;
break;
- case 2: // Return the value of the LT bit of CR6.
- BitNo = 2; InvertBit = false;
+ case 2: // Return the value of the LT bit of CR6.
+ BitNo = 2;
+ InvertBit = false;
+ Bitx = PPC::sub_lt;
+ SetOp = PPCISD::SETBC;
break;
- case 3: // Return the inverted value of the LT bit of CR6.
- BitNo = 2; InvertBit = true;
+ case 3: // Return the inverted value of the LT bit of CR6.
+ BitNo = 2;
+ InvertBit = true;
+ Bitx = PPC::sub_lt;
+ SetOp = PPCISD::SETBCR;
break;
}
+ if (Subtarget.isISA3_1()) {
+ SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
+ SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
+ SDValue CRBit =
+ SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+ CR6Reg, SubRegIdx, CompNode.getValue(1)),
+ 0);
+ return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
+ }
+
+ // Now that we have the comparison, emit a copy from the CR to a GPR.
+ // This is flagged to the above dot comparison.
+ SDValue Flags =
+ DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
+ DAG.getRegister(PPC::CR6, MVT::i32), CompNode.getValue(1));
+
// Shift the bit into the low position.
Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index dde45e4cf6f4ae..1c63444db427db 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -501,6 +501,12 @@ namespace llvm {
/// Constrained floating point add in round-to-zero mode.
STRICT_FADDRTZ,
+ /// SETBC - The ISA 3.1 (P10) SETBC instruction.
+ SETBC,
+
+ /// SETBCR - The ISA 3.1 (P10) SETBCR instruction.
+ SETBCR,
+
// NOTE: The nodes below may require PC-Rel specific patterns if the
// address could be PC-Relative. When adding new nodes below, consider
// whether or not the address can be PC-Relative and add the corresponding
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index c4b8597b1df9ff..1b7c54bb5ce185 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -79,6 +79,10 @@ def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
]>;
+def SDT_PPCsetbc : SDTypeProfile<1, 1, [
+ SDTCisInt<0>, SDTCisInt<1>
+]>;
+
//===----------------------------------------------------------------------===//
// ISA 3.1 specific PPCISD nodes.
//
@@ -91,6 +95,8 @@ def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
[]>;
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
+def PPCsetbc : SDNode<"PPCISD::SETBC", SDT_PPCsetbc, []>;
+def PPCsetbcr : SDNode<"PPCISD::SETBCR", SDT_PPCsetbc, []>;
//===----------------------------------------------------------------------===//
@@ -1397,10 +1403,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [P
let Predicates = [IsISA3_1] in {
def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RST), (ins crbitrc:$BI),
- "setbc $RST, $BI", IIC_IntCompare, []>,
+ "setbc $RST, $BI", IIC_IntCompare,
+ [(set i32:$RST, (PPCsetbc i1:$BI))]>,
SExt32To64, ZExt32To64;
def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RST), (ins crbitrc:$BI),
- "setbcr $RST, $BI", IIC_IntCompare, []>,
+ "setbcr $RST, $BI", IIC_IntCompare,
+ [(set i32:$RST, (PPCsetbcr i1:$BI))]>,
SExt32To64, ZExt32To64;
def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RST), (ins crbitrc:$BI),
"setnbc $RST, $BI", IIC_IntCompare, []>,
diff --git a/llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll b/llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll
new file mode 100644
index 00000000000000..4c8d34895f6b20
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
+
+define range(i64 -2147483648, 2147483648) i64 @cmpgt(<1 x i128> noundef %a, <1 x i128> noundef %b) local_unnamed_addr {
+; CHECK: vcmpgtuq. v2, v3, v2
+; CHECK: setbc r3, 4*cr6+lt
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpgtuq.p(i32 2, <1 x i128> %b, <1 x i128> %a)
+ %conv = sext i32 %0 to i64
+ ret i64 %conv
+}
+
+declare i32 @llvm.ppc.altivec.vcmpgtuq.p(i32, <1 x i128>, <1 x i128>)
diff --git a/llvm/test/CodeGen/PowerPC/vcmp-setbc.ll b/llvm/test/CodeGen/PowerPC/vcmp-setbc.ll
new file mode 100644
index 00000000000000..2c9088b61b034f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vcmp-setbc.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc-ibm-aix -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+
+define signext i32 @cmpgtw(<4 x i32> noundef %a, <4 x i32> noundef %b) local_unnamed_addr {
+; CHECK: vcmpgtsw. v2, v2, v3
+; CHECK: setbc r3, 4*cr6+lt
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 2, <4 x i32> %a, <4 x i32> %b)
+ ret i32 %0
+}
+
+define signext i32 @cmpanynew(<4 x i32> noundef %a, <4 x i32> noundef %b) local_unnamed_addr {
+; CHECK: vcmpequw. v2, v2, v3
+; CHECK: setbcr r3, 4*cr6+lt
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpequw.p(i32 3, <4 x i32> %a, <4 x i32> %b)
+ ret i32 %0
+}
+
+define signext i32 @cmpallneh(<8 x i16> noundef %a, <8 x i16> noundef %b) local_unnamed_addr {
+; CHECK: vcmpequh. v2, v2, v3
+; CHECK: setbc r3, 4*cr6+eq
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpequh.p(i32 0, <8 x i16> %a, <8 x i16> %b)
+ ret i32 %0
+}
+
+define signext i32 @cmpeqb(<16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr {
+; CHECK: vcmpequb. v2, v2, v3
+; CHECK: setbcr r3, 4*cr6+eq
+entry:
+ %0 = tail call i32 @llvm.ppc.altivec.vcmpequb.p(i32 1, <16 x i8> %a, <16 x i8> %b)
+ ret i32 %0
+}
+
+declare i32 @llvm.ppc.altivec.vcmpgtsw.p(i32, <4 x i32>, <4 x i32>)
+
+declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)
+
+declare i32 @llvm.ppc.altivec.vcmpequh.p(i32, <8 x i16>, <8 x i16>)
+
+declare i32 @llvm.ppc.altivec.vcmpequb.p(i32, <16 x i8>, <16 x i8>)
``````````
</details>
https://github.com/llvm/llvm-project/pull/114858
More information about the llvm-commits
mailing list