[llvm] [PowerPC] Use setbc for values from vector compare conditions (PR #114858)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 12 11:55:06 PST 2024


https://github.com/RolandF77 updated https://github.com/llvm/llvm-project/pull/114858

>From a06c1833642ca181408de5b57731e02636d8339a Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Mon, 4 Nov 2024 19:54:25 +0000
Subject: [PATCH 1/2] use setbc

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp  | 62 ++++++++++++++------
 llvm/lib/Target/PowerPC/PPCISelLowering.h    |  6 ++
 llvm/lib/Target/PowerPC/PPCInstrP10.td       | 12 +++-
 llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll | 15 +++++
 llvm/test/CodeGen/PowerPC/vcmp-setbc.ll      | 46 +++++++++++++++
 5 files changed, 122 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll
 create mode 100644 llvm/test/CodeGen/PowerPC/vcmp-setbc.ll

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d8f3095ed7fb68..a5cd136478c096 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1858,6 +1858,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
   case PPCISD::STORE_COND:
     return "PPCISD::STORE_COND";
+  case PPCISD::SETBC:
+    return "PPCISD::SETBC";
+  case PPCISD::SETBCR:
+    return "PPCISD::SETBCR";
   }
   return nullptr;
 }
@@ -11264,31 +11268,55 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
   SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
 
-  // Now that we have the comparison, emit a copy from the CR to a GPR.
-  // This is flagged to the above dot comparison.
-  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
-                                DAG.getRegister(PPC::CR6, MVT::i32),
-                                CompNode.getValue(1));
-
   // Unpack the result based on how the target uses it.
-  unsigned BitNo;   // Bit # of CR6.
-  bool InvertBit;   // Invert result?
+  unsigned BitNo; // Bit # of CR6.
+  bool InvertBit; // Invert result?
+  unsigned Bitx;
+  unsigned SetOp;
   switch (Op.getConstantOperandVal(1)) {
-  default:  // Can't happen, don't crash on invalid number though.
-  case 0:   // Return the value of the EQ bit of CR6.
-    BitNo = 0; InvertBit = false;
+  default: // Can't happen, don't crash on invalid number though.
+  case 0:  // Return the value of the EQ bit of CR6.
+    BitNo = 0;
+    InvertBit = false;
+    Bitx = PPC::sub_eq;
+    SetOp = PPCISD::SETBC;
     break;
-  case 1:   // Return the inverted value of the EQ bit of CR6.
-    BitNo = 0; InvertBit = true;
+  case 1: // Return the inverted value of the EQ bit of CR6.
+    BitNo = 0;
+    InvertBit = true;
+    Bitx = PPC::sub_eq;
+    SetOp = PPCISD::SETBCR;
     break;
-  case 2:   // Return the value of the LT bit of CR6.
-    BitNo = 2; InvertBit = false;
+  case 2: // Return the value of the LT bit of CR6.
+    BitNo = 2;
+    InvertBit = false;
+    Bitx = PPC::sub_lt;
+    SetOp = PPCISD::SETBC;
     break;
-  case 3:   // Return the inverted value of the LT bit of CR6.
-    BitNo = 2; InvertBit = true;
+  case 3: // Return the inverted value of the LT bit of CR6.
+    BitNo = 2;
+    InvertBit = true;
+    Bitx = PPC::sub_lt;
+    SetOp = PPCISD::SETBCR;
     break;
   }
 
+  if (Subtarget.isISA3_1()) {
+    SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
+    SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
+    SDValue CRBit =
+        SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+                                   CR6Reg, SubRegIdx, CompNode.getValue(1)),
+                0);
+    return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
+  }
+
+  // Now that we have the comparison, emit a copy from the CR to a GPR.
+  // This is flagged to the above dot comparison.
+  SDValue Flags =
+      DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
+                  DAG.getRegister(PPC::CR6, MVT::i32), CompNode.getValue(1));
+
   // Shift the bit into the low position.
   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index dde45e4cf6f4ae..1c63444db427db 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -501,6 +501,12 @@ namespace llvm {
     /// Constrained floating point add in round-to-zero mode.
     STRICT_FADDRTZ,
 
+    /// SETBC - The ISA 3.1 (P10) SETBC instruction.
+    SETBC,
+
+    /// SETBCR - The ISA 3.1 (P10) SETBCR instruction.
+    SETBCR,
+
     // NOTE: The nodes below may require PC-Rel specific patterns if the
     // address could be PC-Relative. When adding new nodes below, consider
     // whether or not the address can be PC-Relative and add the corresponding
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index c4b8597b1df9ff..1b7c54bb5ce185 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -79,6 +79,10 @@ def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
   SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
 ]>;
 
+def SDT_PPCsetbc : SDTypeProfile<1, 1, [
+  SDTCisInt<0>, SDTCisInt<1>
+]>;
+
 //===----------------------------------------------------------------------===//
 // ISA 3.1 specific PPCISD nodes.
 //
@@ -91,6 +95,8 @@ def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
 def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
                         []>;
 def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
+def PPCsetbc  : SDNode<"PPCISD::SETBC",   SDT_PPCsetbc, []>;
+def PPCsetbcr : SDNode<"PPCISD::SETBCR",  SDT_PPCsetbc, []>;
 
 //===----------------------------------------------------------------------===//
 
@@ -1397,10 +1403,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [P
 
 let Predicates = [IsISA3_1] in {
   def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RST), (ins crbitrc:$BI),
-                            "setbc $RST, $BI", IIC_IntCompare, []>,
+                            "setbc $RST, $BI", IIC_IntCompare,
+                            [(set i32:$RST, (PPCsetbc i1:$BI))]>,
                             SExt32To64, ZExt32To64;
   def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RST), (ins crbitrc:$BI),
-                             "setbcr $RST, $BI", IIC_IntCompare, []>,
+                             "setbcr $RST, $BI", IIC_IntCompare,
+                             [(set i32:$RST, (PPCsetbcr i1:$BI))]>,
                              SExt32To64, ZExt32To64;
   def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RST), (ins crbitrc:$BI),
                              "setnbc $RST, $BI", IIC_IntCompare, []>,
diff --git a/llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll b/llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll
new file mode 100644
index 00000000000000..4c8d34895f6b20
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names \
+; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
+
+define range(i64 -2147483648, 2147483648) i64 @cmpgt(<1 x i128> noundef %a, <1 x i128> noundef %b) local_unnamed_addr {
+; CHECK: vcmpgtuq. v2, v3, v2
+; CHECK: setbc r3, 4*cr6+lt
+entry:
+  %0 = tail call i32 @llvm.ppc.altivec.vcmpgtuq.p(i32 2, <1 x i128> %b, <1 x i128> %a)
+  %conv = sext i32 %0 to i64
+  ret i64 %conv
+}
+
+declare i32 @llvm.ppc.altivec.vcmpgtuq.p(i32, <1 x i128>, <1 x i128>)
diff --git a/llvm/test/CodeGen/PowerPC/vcmp-setbc.ll b/llvm/test/CodeGen/PowerPC/vcmp-setbc.ll
new file mode 100644
index 00000000000000..2c9088b61b034f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vcmp-setbc.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names \
+; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -mcpu=pwr10 -mtriple=powerpc-ibm-aix -ppc-asm-full-reg-names \
+; RUN:     -ppc-vsr-nums-as-vr < %s | FileCheck %s
+
+define signext i32 @cmpgtw(<4 x i32> noundef %a, <4 x i32> noundef %b) local_unnamed_addr {
+; CHECK: vcmpgtsw. v2, v2, v3
+; CHECK: setbc r3, 4*cr6+lt
+entry:
+  %0 = tail call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 2, <4 x i32> %a, <4 x i32> %b)
+  ret i32 %0
+}
+
+define signext i32 @cmpanynew(<4 x i32> noundef %a, <4 x i32> noundef %b) local_unnamed_addr {
+; CHECK: vcmpequw. v2, v2, v3
+; CHECK: setbcr r3, 4*cr6+lt
+entry:
+  %0 = tail call i32 @llvm.ppc.altivec.vcmpequw.p(i32 3, <4 x i32> %a, <4 x i32> %b)
+  ret i32 %0
+}
+
+define signext i32 @cmpallneh(<8 x i16> noundef %a, <8 x i16> noundef %b) local_unnamed_addr {
+; CHECK: vcmpequh. v2, v2, v3
+; CHECK: setbc r3, 4*cr6+eq
+entry:
+  %0 = tail call i32 @llvm.ppc.altivec.vcmpequh.p(i32 0, <8 x i16> %a, <8 x i16> %b)
+  ret i32 %0
+}
+
+define signext i32 @cmpeqb(<16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr {
+; CHECK: vcmpequb. v2, v2, v3
+; CHECK: setbcr r3, 4*cr6+eq
+entry:
+  %0 = tail call i32 @llvm.ppc.altivec.vcmpequb.p(i32 1, <16 x i8> %a, <16 x i8> %b)
+  ret i32 %0
+}
+
+declare i32 @llvm.ppc.altivec.vcmpgtsw.p(i32, <4 x i32>, <4 x i32>)
+
+declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)
+
+declare i32 @llvm.ppc.altivec.vcmpequh.p(i32, <8 x i16>, <8 x i16>)
+
+declare i32 @llvm.ppc.altivec.vcmpequb.p(i32, <16 x i8>, <16 x i8>)

>From 1da11ad85ce1de06d36aff9b9df8b063dc906c00 Mon Sep 17 00:00:00 2001
From: Roland Froese <froese at ca.ibm.com>
Date: Tue, 12 Nov 2024 20:08:04 +0000
Subject: [PATCH 2/2] Address comments

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a5cd136478c096..d93d0fcdf42684 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11301,13 +11301,13 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     break;
   }
 
+  SDValue GlueOp = CompNode.getValue(1);
   if (Subtarget.isISA3_1()) {
     SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
     SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
     SDValue CRBit =
         SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
-                                   CR6Reg, SubRegIdx, CompNode.getValue(1)),
-                0);
+                                   CR6Reg, SubRegIdx, GlueOp), 0);
     return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
   }
 
@@ -11315,7 +11315,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   // This is flagged to the above dot comparison.
   SDValue Flags =
       DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
-                  DAG.getRegister(PPC::CR6, MVT::i32), CompNode.getValue(1));
+                  DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
 
   // Shift the bit into the low position.
   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,



More information about the llvm-commits mailing list