[llvm] [PowerPC] Fix inefficient code for __builtin_ppc_test_data_class (PR #181420)
zhijian lin via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 13 12:59:37 PST 2026
https://github.com/diggerlin created https://github.com/llvm/llvm-project/pull/181420
None
>From 39e9ca0adf9f1568ca883c2558046c5f5f9c0d8e Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 27 Jan 2026 20:07:42 +0000
Subject: [PATCH 1/2] first commit
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 214 +++++++++++++++++-
.../CodeGen/PowerPC/ppc_test_data_class.ll | 2 -
2 files changed, 203 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3c2ad1b30b139..755dcd1fc9b51 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1436,8 +1436,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
- setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
- ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
+ setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::XOR, ISD::SHL, ISD::SRA,
+ ISD::SRL, ISD::MUL, ISD::FMA, ISD::SINT_TO_FP,
+ ISD::BUILD_VECTOR});
if (Subtarget.hasFPCVT())
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
@@ -11360,15 +11361,30 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
: (OpVT == MVT::f64 ? PPC::XSTSTDCDP
: PPC::XSTSTDCSP);
- return SDValue(
- DAG.getMachineNode(
- PPC::SELECT_CC_I4, dl, MVT::i32,
- {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
- Op.getOperand(1)),
- 0),
- DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
- DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
- 0);
+ // Create XSTSTDCDP/XSTSTDCSP node.
+ SDValue TestDataClass =
+ SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
+ {Op.getOperand(2), Op.getOperand(1)}),
+ 0);
+ if (Subtarget.isISA3_1()) {
+ // Extract CR bit 2 (EQ bit) from CR field.
+ SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
+ SDValue CRBit =
+ SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+ TestDataClass, SubRegIdx),
+ 0);
+
+ // Use PPCsetbc to convert CR bit to integer
+ return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
+
+ } else {
+ return SDValue(DAG.getMachineNode(
+ PPC::SELECT_CC_I4, dl, MVT::i32,
+ {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32),
+ DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
+ 0);
+ }
}
case Intrinsic::ppc_fnmsub: {
EVT VT = Op.getOperand(1).getValueType();
@@ -17198,6 +17214,173 @@ static SDValue DAGCombineAddc(SDNode *N,
return SDValue();
}
+/// Optimize zero-extension of setcc when the compared value is known to be 0
+/// or 1.
+///
+/// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
+/// -> zext(xor(Value, 1)) for seteq
+/// -> zext(Value) for setne
+///
+/// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
+/// by keeping the value in its original i32 type throughout.
+///
+/// Example:
+/// Before: zext(setcc(test_data_class(...), 0, seteq))
+/// // test_data_class returns 0 or 1 in i32
+/// // setcc converts i32 -> i1
+/// // zext converts i1 -> i64
+/// After: zext(xor(test_data_class(...), 1))
+/// // Stays in i32, then extends to i64
+///
+/// This is beneficial because:
+/// 1. Eliminates the setcc instruction
+/// 2. Avoids i32 -> i1 truncation
+/// 3. Keeps computation in native integer width
+
+static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG) {
+ // Check if this is a zero_extend
+ if (N->getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+
+ // Check if the source is a setcc
+ if (Src.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ SDValue LHS = Src.getOperand(0);
+ SDValue RHS = Src.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
+
+ if(!isNullConstant(RHS) && !isNullConstant(LHS))
+ return SDValue();
+
+ SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS ;
+
+ auto isZeroOrOne = [=](SDValue &V) {
+ if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ V.getConstantOperandVal(0) == Intrinsic::ppc_test_data_class)
+ return true;
+ return false;
+ };
+
+ if (!isZeroOrOne(NonNullConstant))
+ return SDValue();
+
+ // Check for pattern: zext(setcc (Value), 0, seteq)) or
+ // zext(setcc (Value), 0, setne))
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ // Replace with: zext(xor(Value, 1)) for seteq
+ // or: zext(Value) for setne
+ // This keeps the value in i32 instead of converting to i1
+ SDLoc DL(N);
+ EVT VType = N->getValueType(0);
+ SDValue NewNonNullConstant =
+ DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
+
+ if (CC == ISD::SETNE)
+ return NewNonNullConstant;
+
+ SDValue One = DAG.getConstant(1, DL, VType);
+ return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
+ }
+
+ return SDValue();
+}
+
+// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
+// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
+// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
+// 1, cc))
+// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
+// 0, 1, cc))
+// 4. etc
+static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
+
+ EVT XorVT = N->getValueType(0);
+ if ((XorVT != MVT::i32 && XorVT != MVT::i64))
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Check for XOR with constant 1
+ ConstantSDNode *XorConst = dyn_cast<ConstantSDNode>(RHS);
+ if (!XorConst || !XorConst->isOne()) {
+ XorConst = dyn_cast<ConstantSDNode>(LHS);
+ if (!XorConst || !XorConst->isOne())
+ return SDValue();
+ // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
+ std::swap(LHS, RHS);
+ }
+
+ // Check if LHS has only one use
+ if (!LHS.hasOneUse())
+ return SDValue();
+
+ // Handle extensions: ZEXT, ANYEXT
+ SDValue SelectNode = LHS;
+
+ if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
+ LHS.getOpcode() == ISD::ANY_EXTEND) {
+ SelectNode = LHS.getOperand(0);
+
+ // Check if the extension input has only one use
+ if (!SelectNode.hasOneUse())
+ return SDValue();
+ }
+
+ // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
+ if (!SelectNode.isMachineOpcode())
+ return SDValue();
+
+ unsigned MachineOpc = SelectNode.getMachineOpcode();
+
+ // Handle both SELECT_CC_I4 and SELECT_CC_I8
+ if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
+ return SDValue();
+
+ // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
+ if (SelectNode.getNumOperands() != 4)
+ return SDValue();
+
+ SDValue Cond = SelectNode.getOperand(0);
+ SDValue TrueVal = SelectNode.getOperand(1);
+ SDValue FalseVal = SelectNode.getOperand(2);
+ SDValue BrOpcode = SelectNode.getOperand(3);
+
+ // Check if true_val is constant 1 and false_val is constant 0
+ ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueVal);
+ ConstantSDNode *FalseConst = dyn_cast<ConstantSDNode>(FalseVal);
+
+ if (!TrueConst || !FalseConst)
+ return SDValue();
+
+ if (!(TrueConst->isOne() && FalseConst->isZero() ||
+ TrueConst->isZero() && FalseConst->isOne()))
+ return SDValue();
+
+ // Pattern matched! Create new SELECT_CC with swapped operands
+ SDLoc DL(N);
+ MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
+ SDValue ZeroValue = DAG.getConstant(0, DL, XorVT);
+ SDValue OneValue = DAG.getConstant(1, DL, XorVT);
+
+ // Create new MachineSDNode: SELECT_CC_I4/8(cond, 0, 1, bropc)
+ SDValue NewSelect = SDValue(
+ DAG.getMachineNode(
+ MachineOpc, DL, XorVT,
+ {Cond, // Same condition
+ TrueConst->isOne() ? ZeroValue : OneValue, // 0 (was false, now true)
+ FalseConst->isZero() ? OneValue
+ : ZeroValue, // 1 (was true, now false)
+ BrOpcode}), // Same branch opcode
+ 0);
+
+ return NewSelect;
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -17230,6 +17413,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
}
+ case ISD::XOR: {
+ // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
+ if (SDValue V = combineXorSelectCC(N, DAG))
+ return V;
+ break;
+ }
case ISD::SHL:
return combineSHL(N, DCI);
case ISD::SRA:
@@ -17258,6 +17447,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
+ if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
+ return RetV;
+ [[fallthrough]];
case ISD::ANY_EXTEND:
return DAGCombineExtBoolTrunc(N, DCI);
case ISD::TRUNCATE:
diff --git a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
index 55f175c648f00..3f870c022a5cb 100644
--- a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
@@ -10,7 +10,6 @@ define noundef zeroext i1 @_Z16ossIsValidDoubled(double noundef %in) local_unnam
; PWR9-LABEL: _Z16ossIsValidDoubled:
; PWR9: # %bb.0: # %entry
; PWR9-NEXT: xststdcdp cr0, f1, 115
-; PWR9-NEXT: li r3, 0
; PWR9-NEXT: li r4, 1
; PWR9-NEXT: iseleq r3, r4, r3
; PWR9-NEXT: cntlzw r3, r3
@@ -36,7 +35,6 @@ define noundef zeroext i1 @_Z16ossIsValidDoubled(double noundef %in) local_unnam
; BIT64-PWR9-NEXT: cntlzw r3, r3
; BIT64-PWR9-NEXT: srwi r3, r3, 5
; BIT64-PWR9-NEXT: blr
-
entry:
%test_data_class = tail call i32 @llvm.ppc.test.data.class.f64(double %in, i32 115)
%tobool.not = icmp eq i32 %test_data_class, 0
>From e603d7636a8f3bcb4ae88e175e3c34280b64340e Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 13 Feb 2026 21:06:29 +0000
Subject: [PATCH 2/2] changed test case
---
.../CodeGen/PowerPC/ppc_test_data_class.ll | 28 +++++--------------
1 file changed, 7 insertions(+), 21 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
index 3f870c022a5cb..5fc99422d1a58 100644
--- a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
@@ -1,40 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR9 %s
; RUN: llc -mtriple=powerpc-ibm-aixi-xcoff -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR10 %s
-; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=BIT64-PWR9 %s
+; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR9 %s
; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR10 %s
-; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=BIT64-PWR9 %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR9 %s
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR10 %s
define noundef zeroext i1 @_Z16ossIsValidDoubled(double noundef %in) local_unnamed_addr {
; PWR9-LABEL: _Z16ossIsValidDoubled:
; PWR9: # %bb.0: # %entry
; PWR9-NEXT: xststdcdp cr0, f1, 115
-; PWR9-NEXT: li r4, 1
-; PWR9-NEXT: iseleq r3, r4, r3
-; PWR9-NEXT: cntlzw r3, r3
-; PWR9-NEXT: rlwinm r3, r3, 27, 31, 31
+; PWR9-NEXT: li r3, 1
+; PWR9-NEXT: iseleq r3, 0, r3
; PWR9-NEXT: blr
;
; PWR10-LABEL: _Z16ossIsValidDoubled:
; PWR10: # %bb.0: # %entry
; PWR10-NEXT: xststdcdp cr0, f1, 115
-; PWR10-NEXT: li r3, 0
-; PWR10-NEXT: li r4, 1
-; PWR10-NEXT: iseleq r3, r4, r3
-; PWR10-NEXT: cntlzw r3, r3
-; PWR10-NEXT: rlwinm r3, r3, 27, 31, 31
+; PWR10-NEXT: setbc r3, eq
+; PWR10-NEXT: xori r3, r3, 1
; PWR10-NEXT: blr
-;
-; BIT64-PWR9-LABEL: _Z16ossIsValidDoubled:
-; BIT64-PWR9: # %bb.0: # %entry
-; BIT64-PWR9-NEXT: xststdcdp cr0, f1, 115
-; BIT64-PWR9-NEXT: li r3, 0
-; BIT64-PWR9-NEXT: li r4, 1
-; BIT64-PWR9-NEXT: iseleq r3, r4, r3
-; BIT64-PWR9-NEXT: cntlzw r3, r3
-; BIT64-PWR9-NEXT: srwi r3, r3, 5
-; BIT64-PWR9-NEXT: blr
+
entry:
%test_data_class = tail call i32 @llvm.ppc.test.data.class.f64(double %in, i32 115)
%tobool.not = icmp eq i32 %test_data_class, 0
More information about the llvm-commits
mailing list