[llvm] [PowerPC] Fix inefficient code for __builtin_ppc_test_data_class (PR #181420)

zhijian lin via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 13 12:59:37 PST 2026


https://github.com/diggerlin created https://github.com/llvm/llvm-project/pull/181420

None

>From 39e9ca0adf9f1568ca883c2558046c5f5f9c0d8e Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 27 Jan 2026 20:07:42 +0000
Subject: [PATCH 1/2] first commit

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 214 +++++++++++++++++-
 .../CodeGen/PowerPC/ppc_test_data_class.ll    |   2 -
 2 files changed, 203 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3c2ad1b30b139..755dcd1fc9b51 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1436,8 +1436,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 
   // We have target-specific dag combine patterns for the following nodes:
-  setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
-                       ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
+  setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::XOR, ISD::SHL, ISD::SRA,
+                       ISD::SRL, ISD::MUL, ISD::FMA, ISD::SINT_TO_FP,
+                       ISD::BUILD_VECTOR});
   if (Subtarget.hasFPCVT())
     setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
@@ -11360,15 +11361,30 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
                                          : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
                                                              : PPC::XSTSTDCSP);
-    return SDValue(
-        DAG.getMachineNode(
-            PPC::SELECT_CC_I4, dl, MVT::i32,
-            {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
-                                        Op.getOperand(1)),
-                     0),
-             DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
-             DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
-        0);
+    // Create XSTSTDCDP/XSTSTDCSP node.
+    SDValue TestDataClass =
+        SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
+                                   {Op.getOperand(2), Op.getOperand(1)}),
+                0);
+    if (Subtarget.isISA3_1()) {
+      // Extract CR bit 2 (EQ bit) from CR field.
+      SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
+      SDValue CRBit =
+          SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+                                     TestDataClass, SubRegIdx),
+                  0);
+
+      // Use PPCsetbc to convert CR bit to integer
+      return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
+
+    } else {
+      return SDValue(DAG.getMachineNode(
+                         PPC::SELECT_CC_I4, dl, MVT::i32,
+                         {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
+                          DAG.getConstant(0, dl, MVT::i32),
+                          DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
+                     0);
+    }
   }
   case Intrinsic::ppc_fnmsub: {
     EVT VT = Op.getOperand(1).getValueType();
@@ -17198,6 +17214,173 @@ static SDValue DAGCombineAddc(SDNode *N,
   return SDValue();
 }
 
+/// Optimize zero-extension of setcc when the compared value is known to be 0
+/// or 1.
+///
+/// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
+///   -> zext(xor(Value, 1))  for seteq
+///   -> zext(Value)          for setne
+///
+/// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
+/// by keeping the value in its original i32 type throughout.
+///
+/// Example:
+///   Before: zext(setcc(test_data_class(...), 0, seteq))
+///           // test_data_class returns 0 or 1 in i32
+///           // setcc converts i32 -> i1
+///           // zext converts i1 -> i64
+///   After:  zext(xor(test_data_class(...), 1))
+///           // Stays in i32, then extends to i64
+///
+/// This is beneficial because:
+/// 1. Eliminates the setcc instruction
+/// 2. Avoids i32 -> i1 truncation
+/// 3. Keeps computation in native integer width
+
+static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG) {
+  // Check if this is a zero_extend
+  if (N->getOpcode() != ISD::ZERO_EXTEND)
+    return SDValue();
+
+  SDValue Src = N->getOperand(0);
+
+  // Check if the source is a setcc
+  if (Src.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  SDValue LHS = Src.getOperand(0);
+  SDValue RHS = Src.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
+
+  if(!isNullConstant(RHS) && !isNullConstant(LHS))
+     return SDValue();
+
+  SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS ;
+
+  auto isZeroOrOne = [=](SDValue &V) {
+    if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+        V.getConstantOperandVal(0) == Intrinsic::ppc_test_data_class)
+      return true;
+    return false;
+  };
+
+  if (!isZeroOrOne(NonNullConstant))
+    return SDValue();
+
+  // Check for pattern: zext(setcc (Value), 0, seteq)) or
+  // zext(setcc (Value), 0, setne))
+  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+    // Replace with: zext(xor(Value, 1)) for seteq
+    //           or: zext(Value)         for setne
+    // This keeps the value in i32 instead of converting to i1
+    SDLoc DL(N);
+    EVT VType = N->getValueType(0);
+    SDValue NewNonNullConstant =
+        DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
+
+    if (CC == ISD::SETNE)
+      return NewNonNullConstant;
+
+    SDValue One = DAG.getConstant(1, DL, VType);
+    return  DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
+  }
+
+  return SDValue();
+}
+
+// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
+// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
+// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
+// 1, cc))
+// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
+// 0, 1, cc))
+// 4. etc
+static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
+
+  EVT XorVT = N->getValueType(0);
+  if ((XorVT != MVT::i32 && XorVT != MVT::i64))
+    return SDValue();
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  // Check for XOR with constant 1
+  ConstantSDNode *XorConst = dyn_cast<ConstantSDNode>(RHS);
+  if (!XorConst || !XorConst->isOne()) {
+    XorConst = dyn_cast<ConstantSDNode>(LHS);
+    if (!XorConst || !XorConst->isOne())
+      return SDValue();
+    // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
+    std::swap(LHS, RHS);
+  }
+
+  // Check if LHS has only one use
+  if (!LHS.hasOneUse())
+    return SDValue();
+
+  // Handle extensions: ZEXT, ANYEXT
+  SDValue SelectNode = LHS;
+
+  if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
+      LHS.getOpcode() == ISD::ANY_EXTEND) {
+    SelectNode = LHS.getOperand(0);
+
+    // Check if the extension input has only one use
+    if (!SelectNode.hasOneUse())
+      return SDValue();
+  }
+
+  // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
+  if (!SelectNode.isMachineOpcode())
+    return SDValue();
+
+  unsigned MachineOpc = SelectNode.getMachineOpcode();
+
+  // Handle both SELECT_CC_I4 and SELECT_CC_I8
+  if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
+    return SDValue();
+
+  // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
+  if (SelectNode.getNumOperands() != 4)
+    return SDValue();
+
+  SDValue Cond = SelectNode.getOperand(0);
+  SDValue TrueVal = SelectNode.getOperand(1);
+  SDValue FalseVal = SelectNode.getOperand(2);
+  SDValue BrOpcode = SelectNode.getOperand(3);
+
+  // Check if true_val is constant 1 and false_val is constant 0
+  ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueVal);
+  ConstantSDNode *FalseConst = dyn_cast<ConstantSDNode>(FalseVal);
+
+  if (!TrueConst || !FalseConst)
+    return SDValue();
+
+  if (!(TrueConst->isOne() && FalseConst->isZero() ||
+        TrueConst->isZero() && FalseConst->isOne()))
+    return SDValue();
+
+  // Pattern matched! Create new SELECT_CC with swapped operands
+  SDLoc DL(N);
+  MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
+  SDValue ZeroValue = DAG.getConstant(0, DL, XorVT);
+  SDValue OneValue = DAG.getConstant(1, DL, XorVT);
+
+  // Create new MachineSDNode: SELECT_CC_I4/8(cond, 0, 1, bropc)
+  SDValue NewSelect = SDValue(
+      DAG.getMachineNode(
+          MachineOpc, DL, XorVT,
+          {Cond,                                      // Same condition
+           TrueConst->isOne() ? ZeroValue : OneValue, // 0 (was false, now true)
+           FalseConst->isZero() ? OneValue
+                                : ZeroValue, // 1 (was true, now false)
+           BrOpcode}),                       // Same branch opcode
+      0);
+
+  return NewSelect;
+}
+
 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -17230,6 +17413,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
     return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
   }
+  case ISD::XOR: {
+    // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
+    if (SDValue V = combineXorSelectCC(N, DAG))
+      return V;
+    break;
+  }
   case ISD::SHL:
     return combineSHL(N, DCI);
   case ISD::SRA:
@@ -17258,6 +17447,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     break;
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
+    if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
+      return RetV;
+    [[fallthrough]];
   case ISD::ANY_EXTEND:
     return DAGCombineExtBoolTrunc(N, DCI);
   case ISD::TRUNCATE:
diff --git a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
index 55f175c648f00..3f870c022a5cb 100644
--- a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
@@ -10,7 +10,6 @@ define noundef zeroext i1 @_Z16ossIsValidDoubled(double noundef %in) local_unnam
 ; PWR9-LABEL: _Z16ossIsValidDoubled:
 ; PWR9:       # %bb.0: # %entry
 ; PWR9-NEXT:    xststdcdp cr0, f1, 115
-; PWR9-NEXT:    li r3, 0
 ; PWR9-NEXT:    li r4, 1
 ; PWR9-NEXT:    iseleq r3, r4, r3
 ; PWR9-NEXT:    cntlzw r3, r3
@@ -36,7 +35,6 @@ define noundef zeroext i1 @_Z16ossIsValidDoubled(double noundef %in) local_unnam
 ; BIT64-PWR9-NEXT:    cntlzw r3, r3
 ; BIT64-PWR9-NEXT:    srwi r3, r3, 5
 ; BIT64-PWR9-NEXT:    blr
-
 entry:
   %test_data_class = tail call i32 @llvm.ppc.test.data.class.f64(double %in, i32 115)
   %tobool.not = icmp eq i32 %test_data_class, 0

>From e603d7636a8f3bcb4ae88e175e3c34280b64340e Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 13 Feb 2026 21:06:29 +0000
Subject: [PATCH 2/2] changed test case

---
 .../CodeGen/PowerPC/ppc_test_data_class.ll    | 28 +++++--------------
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
index 3f870c022a5cb..5fc99422d1a58 100644
--- a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
@@ -1,40 +1,26 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc   -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR9 %s
 ; RUN: llc   -mtriple=powerpc-ibm-aixi-xcoff -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR10 %s
-; RUN: llc   -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=BIT64-PWR9 %s
+; RUN: llc   -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR9 %s
 ; RUN: llc   -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR10 %s
-; RUN: llc   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=BIT64-PWR9 %s
+; RUN: llc   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR9 %s
 ; RUN: llc   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR10 %s
 
 define noundef zeroext i1 @_Z16ossIsValidDoubled(double noundef %in) local_unnamed_addr  {
 ; PWR9-LABEL: _Z16ossIsValidDoubled:
 ; PWR9:       # %bb.0: # %entry
 ; PWR9-NEXT:    xststdcdp cr0, f1, 115
-; PWR9-NEXT:    li r4, 1
-; PWR9-NEXT:    iseleq r3, r4, r3
-; PWR9-NEXT:    cntlzw r3, r3
-; PWR9-NEXT:    rlwinm r3, r3, 27, 31, 31
+; PWR9-NEXT:    li r3, 1
+; PWR9-NEXT:    iseleq r3, 0, r3
 ; PWR9-NEXT:    blr
 ;
 ; PWR10-LABEL: _Z16ossIsValidDoubled:
 ; PWR10:       # %bb.0: # %entry
 ; PWR10-NEXT:    xststdcdp cr0, f1, 115
-; PWR10-NEXT:    li r3, 0
-; PWR10-NEXT:    li r4, 1
-; PWR10-NEXT:    iseleq r3, r4, r3
-; PWR10-NEXT:    cntlzw r3, r3
-; PWR10-NEXT:    rlwinm r3, r3, 27, 31, 31
+; PWR10-NEXT:    setbc r3, eq
+; PWR10-NEXT:    xori r3, r3, 1
 ; PWR10-NEXT:    blr
-;
-; BIT64-PWR9-LABEL: _Z16ossIsValidDoubled:
-; BIT64-PWR9:       # %bb.0: # %entry
-; BIT64-PWR9-NEXT:    xststdcdp cr0, f1, 115
-; BIT64-PWR9-NEXT:    li r3, 0
-; BIT64-PWR9-NEXT:    li r4, 1
-; BIT64-PWR9-NEXT:    iseleq r3, r4, r3
-; BIT64-PWR9-NEXT:    cntlzw r3, r3
-; BIT64-PWR9-NEXT:    srwi r3, r3, 5
-; BIT64-PWR9-NEXT:    blr
+
 entry:
   %test_data_class = tail call i32 @llvm.ppc.test.data.class.f64(double %in, i32 115)
   %tobool.not = icmp eq i32 %test_data_class, 0



More information about the llvm-commits mailing list