[llvm] [PowerPC] Fix inefficient code for __builtin_ppc_test_data_class (PR #181420)
zhijian lin via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 19 11:31:34 PST 2026
https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/181420
>From 39e9ca0adf9f1568ca883c2558046c5f5f9c0d8e Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 27 Jan 2026 20:07:42 +0000
Subject: [PATCH 1/9] first commit
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 214 +++++++++++++++++-
.../CodeGen/PowerPC/ppc_test_data_class.ll | 2 -
2 files changed, 203 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3c2ad1b30b139..755dcd1fc9b51 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1436,8 +1436,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
- setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
- ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
+ setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::XOR, ISD::SHL, ISD::SRA,
+ ISD::SRL, ISD::MUL, ISD::FMA, ISD::SINT_TO_FP,
+ ISD::BUILD_VECTOR});
if (Subtarget.hasFPCVT())
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
@@ -11360,15 +11361,30 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
: (OpVT == MVT::f64 ? PPC::XSTSTDCDP
: PPC::XSTSTDCSP);
- return SDValue(
- DAG.getMachineNode(
- PPC::SELECT_CC_I4, dl, MVT::i32,
- {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
- Op.getOperand(1)),
- 0),
- DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
- DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
- 0);
+ // Create XSTSTDCDP/XSTSTDCSP node.
+ SDValue TestDataClass =
+ SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
+ {Op.getOperand(2), Op.getOperand(1)}),
+ 0);
+ if (Subtarget.isISA3_1()) {
+ // Extract CR bit 2 (EQ bit) from CR field.
+ SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
+ SDValue CRBit =
+ SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+ TestDataClass, SubRegIdx),
+ 0);
+
+ // Use PPCsetbc to convert CR bit to integer
+ return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
+
+ } else {
+ return SDValue(DAG.getMachineNode(
+ PPC::SELECT_CC_I4, dl, MVT::i32,
+ {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32),
+ DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
+ 0);
+ }
}
case Intrinsic::ppc_fnmsub: {
EVT VT = Op.getOperand(1).getValueType();
@@ -17198,6 +17214,173 @@ static SDValue DAGCombineAddc(SDNode *N,
return SDValue();
}
+/// Optimize zero-extension of setcc when the compared value is known to be 0
+/// or 1.
+///
+/// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
+/// -> zext(xor(Value, 1)) for seteq
+/// -> zext(Value) for setne
+///
+/// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
+/// by keeping the value in its original i32 type throughout.
+///
+/// Example:
+/// Before: zext(setcc(test_data_class(...), 0, seteq))
+/// // test_data_class returns 0 or 1 in i32
+/// // setcc converts i32 -> i1
+/// // zext converts i1 -> i64
+/// After: zext(xor(test_data_class(...), 1))
+/// // Stays in i32, then extends to i64
+///
+/// This is beneficial because:
+/// 1. Eliminates the setcc instruction
+/// 2. Avoids i32 -> i1 truncation
+/// 3. Keeps computation in native integer width
+
+static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG) {
+ // Check if this is a zero_extend
+ if (N->getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+
+ // Check if the source is a setcc
+ if (Src.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ SDValue LHS = Src.getOperand(0);
+ SDValue RHS = Src.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
+
+ if(!isNullConstant(RHS) && !isNullConstant(LHS))
+ return SDValue();
+
+ SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS ;
+
+ auto isZeroOrOne = [=](SDValue &V) {
+ if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ V.getConstantOperandVal(0) == Intrinsic::ppc_test_data_class)
+ return true;
+ return false;
+ };
+
+ if (!isZeroOrOne(NonNullConstant))
+ return SDValue();
+
+ // Check for pattern: zext(setcc (Value), 0, seteq)) or
+ // zext(setcc (Value), 0, setne))
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ // Replace with: zext(xor(Value, 1)) for seteq
+ // or: zext(Value) for setne
+ // This keeps the value in i32 instead of converting to i1
+ SDLoc DL(N);
+ EVT VType = N->getValueType(0);
+ SDValue NewNonNullConstant =
+ DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
+
+ if (CC == ISD::SETNE)
+ return NewNonNullConstant;
+
+ SDValue One = DAG.getConstant(1, DL, VType);
+ return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
+ }
+
+ return SDValue();
+}
+
+// Combine XOR patterns with SELECT_CC_I4/I8, for Example:
+// 1. XOR(SELECT_CC_I4(cond, 1, 0, cc), 1) -> SELECT_CC_I4(cond, 0, 1, cc)
+// 2. XOR(ZEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond, 0,
+// 1, cc))
+// 3. XOR(ANYEXT(SELECT_CC_I4(cond, 1, 0, cc)), 1) -> SELECT_CC_I4/I8(cond,
+// 0, 1, cc))
+// 4. etc
+static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::XOR && "Expected XOR node");
+
+ EVT XorVT = N->getValueType(0);
+ if ((XorVT != MVT::i32 && XorVT != MVT::i64))
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Check for XOR with constant 1
+ ConstantSDNode *XorConst = dyn_cast<ConstantSDNode>(RHS);
+ if (!XorConst || !XorConst->isOne()) {
+ XorConst = dyn_cast<ConstantSDNode>(LHS);
+ if (!XorConst || !XorConst->isOne())
+ return SDValue();
+ // Swap so LHS is the SELECT_CC_I4 (or extension) and RHS is the constant
+ std::swap(LHS, RHS);
+ }
+
+ // Check if LHS has only one use
+ if (!LHS.hasOneUse())
+ return SDValue();
+
+ // Handle extensions: ZEXT, ANYEXT
+ SDValue SelectNode = LHS;
+
+ if (LHS.getOpcode() == ISD::ZERO_EXTEND ||
+ LHS.getOpcode() == ISD::ANY_EXTEND) {
+ SelectNode = LHS.getOperand(0);
+
+ // Check if the extension input has only one use
+ if (!SelectNode.hasOneUse())
+ return SDValue();
+ }
+
+ // Check if SelectNode is a MachineSDNode with SELECT_CC_I4/I8 opcode
+ if (!SelectNode.isMachineOpcode())
+ return SDValue();
+
+ unsigned MachineOpc = SelectNode.getMachineOpcode();
+
+ // Handle both SELECT_CC_I4 and SELECT_CC_I8
+ if (MachineOpc != PPC::SELECT_CC_I4 && MachineOpc != PPC::SELECT_CC_I8)
+ return SDValue();
+
+ // SELECT_CC_I4 operands: (cond, true_val, false_val, bropc)
+ if (SelectNode.getNumOperands() != 4)
+ return SDValue();
+
+ SDValue Cond = SelectNode.getOperand(0);
+ SDValue TrueVal = SelectNode.getOperand(1);
+ SDValue FalseVal = SelectNode.getOperand(2);
+ SDValue BrOpcode = SelectNode.getOperand(3);
+
+ // Check if true_val is constant 1 and false_val is constant 0
+ ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueVal);
+ ConstantSDNode *FalseConst = dyn_cast<ConstantSDNode>(FalseVal);
+
+ if (!TrueConst || !FalseConst)
+ return SDValue();
+
+ if (!(TrueConst->isOne() && FalseConst->isZero() ||
+ TrueConst->isZero() && FalseConst->isOne()))
+ return SDValue();
+
+ // Pattern matched! Create new SELECT_CC with swapped operands
+ SDLoc DL(N);
+ MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
+ SDValue ZeroValue = DAG.getConstant(0, DL, XorVT);
+ SDValue OneValue = DAG.getConstant(1, DL, XorVT);
+
+ // Create new MachineSDNode: SELECT_CC_I4/8(cond, 0, 1, bropc)
+ SDValue NewSelect = SDValue(
+ DAG.getMachineNode(
+ MachineOpc, DL, XorVT,
+ {Cond, // Same condition
+ TrueConst->isOne() ? ZeroValue : OneValue, // 0 (was false, now true)
+ FalseConst->isZero() ? OneValue
+ : ZeroValue, // 1 (was true, now false)
+ BrOpcode}), // Same branch opcode
+ 0);
+
+ return NewSelect;
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -17230,6 +17413,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
}
+ case ISD::XOR: {
+ // Optimize XOR(ISEL(1,0,CR), 1) -> ISEL(0,1,CR)
+ if (SDValue V = combineXorSelectCC(N, DAG))
+ return V;
+ break;
+ }
case ISD::SHL:
return combineSHL(N, DCI);
case ISD::SRA:
@@ -17258,6 +17447,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
+ if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
+ return RetV;
+ [[fallthrough]];
case ISD::ANY_EXTEND:
return DAGCombineExtBoolTrunc(N, DCI);
case ISD::TRUNCATE:
diff --git a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
index 55f175c648f00..3f870c022a5cb 100644
--- a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
@@ -10,7 +10,6 @@ define noundef zeroext i1 @_Z16ossIsValidDoubled(double noundef %in) local_unnam
; PWR9-LABEL: _Z16ossIsValidDoubled:
; PWR9: # %bb.0: # %entry
; PWR9-NEXT: xststdcdp cr0, f1, 115
-; PWR9-NEXT: li r3, 0
; PWR9-NEXT: li r4, 1
; PWR9-NEXT: iseleq r3, r4, r3
; PWR9-NEXT: cntlzw r3, r3
@@ -36,7 +35,6 @@ define noundef zeroext i1 @_Z16ossIsValidDoubled(double noundef %in) local_unnam
; BIT64-PWR9-NEXT: cntlzw r3, r3
; BIT64-PWR9-NEXT: srwi r3, r3, 5
; BIT64-PWR9-NEXT: blr
-
entry:
%test_data_class = tail call i32 @llvm.ppc.test.data.class.f64(double %in, i32 115)
%tobool.not = icmp eq i32 %test_data_class, 0
>From e603d7636a8f3bcb4ae88e175e3c34280b64340e Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 13 Feb 2026 21:06:29 +0000
Subject: [PATCH 2/9] changed test case
---
.../CodeGen/PowerPC/ppc_test_data_class.ll | 28 +++++--------------
1 file changed, 7 insertions(+), 21 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
index 3f870c022a5cb..5fc99422d1a58 100644
--- a/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc_test_data_class.ll
@@ -1,40 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR9 %s
; RUN: llc -mtriple=powerpc-ibm-aixi-xcoff -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR10 %s
-; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=BIT64-PWR9 %s
+; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR9 %s
; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR10 %s
-; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=BIT64-PWR9 %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR9 %s
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=PWR10 %s
define noundef zeroext i1 @_Z16ossIsValidDoubled(double noundef %in) local_unnamed_addr {
; PWR9-LABEL: _Z16ossIsValidDoubled:
; PWR9: # %bb.0: # %entry
; PWR9-NEXT: xststdcdp cr0, f1, 115
-; PWR9-NEXT: li r4, 1
-; PWR9-NEXT: iseleq r3, r4, r3
-; PWR9-NEXT: cntlzw r3, r3
-; PWR9-NEXT: rlwinm r3, r3, 27, 31, 31
+; PWR9-NEXT: li r3, 1
+; PWR9-NEXT: iseleq r3, 0, r3
; PWR9-NEXT: blr
;
; PWR10-LABEL: _Z16ossIsValidDoubled:
; PWR10: # %bb.0: # %entry
; PWR10-NEXT: xststdcdp cr0, f1, 115
-; PWR10-NEXT: li r3, 0
-; PWR10-NEXT: li r4, 1
-; PWR10-NEXT: iseleq r3, r4, r3
-; PWR10-NEXT: cntlzw r3, r3
-; PWR10-NEXT: rlwinm r3, r3, 27, 31, 31
+; PWR10-NEXT: setbc r3, eq
+; PWR10-NEXT: xori r3, r3, 1
; PWR10-NEXT: blr
-;
-; BIT64-PWR9-LABEL: _Z16ossIsValidDoubled:
-; BIT64-PWR9: # %bb.0: # %entry
-; BIT64-PWR9-NEXT: xststdcdp cr0, f1, 115
-; BIT64-PWR9-NEXT: li r3, 0
-; BIT64-PWR9-NEXT: li r4, 1
-; BIT64-PWR9-NEXT: iseleq r3, r4, r3
-; BIT64-PWR9-NEXT: cntlzw r3, r3
-; BIT64-PWR9-NEXT: srwi r3, r3, 5
-; BIT64-PWR9-NEXT: blr
+
entry:
%test_data_class = tail call i32 @llvm.ppc.test.data.class.f64(double %in, i32 115)
%tobool.not = icmp eq i32 %test_data_class, 0
>From eb8170e206737c500868bf6b004fc4ccb73c955e Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 13 Feb 2026 21:34:20 +0000
Subject: [PATCH 3/9] fix a warning
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 755dcd1fc9b51..f23bc71cb6667 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17357,8 +17357,8 @@ static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
if (!TrueConst || !FalseConst)
return SDValue();
- if (!(TrueConst->isOne() && FalseConst->isZero() ||
- TrueConst->isZero() && FalseConst->isOne()))
+ if (!((TrueConst->isOne() && FalseConst->isZero()) ||
+ (TrueConst->isZero() && FalseConst->isOne())))
return SDValue();
// Pattern matched! Create new SELECT_CC with swapped operands
>From 4ab2631d408562eba8ad1e9daaff9a1cc172611d Mon Sep 17 00:00:00 2001
From: Zhijian Lin <zhijian at ca.ibm.com>
Date: Wed, 18 Feb 2026 13:40:14 -0500
Subject: [PATCH 4/9] address comment
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 71 ++++++++++-----------
1 file changed, 35 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f23bc71cb6667..6a30dcad7e095 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11361,30 +11361,37 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
: (OpVT == MVT::f64 ? PPC::XSTSTDCDP
: PPC::XSTSTDCSP);
- // Create XSTSTDCDP/XSTSTDCSP node.
+ // Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
+ // The XSTSTDC* instructions test if a floating-point value matches any of the
+ // data classes specified in the mask, setting CR field bits accordingly.
+ // We need to extract the EQ bit (bit 2) from the CR field and convert it to
+ // an integer result (1 if match, 0 if no match).
+ //
+ // Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
+ // intrinsic provides (value, mask) as Op.getOperand(1) and Op.getOperand(2).
SDValue TestDataClass =
SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
{Op.getOperand(2), Op.getOperand(1)}),
0);
if (Subtarget.isISA3_1()) {
- // Extract CR bit 2 (EQ bit) from CR field.
+ // ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
+ // This is more efficient than the SELECT_CC approach used in earlier ISAs.
SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
SDValue CRBit =
SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
TestDataClass, SubRegIdx),
0);
- // Use PPCsetbc to convert CR bit to integer
return DAG.getNode(PPCISD::SETBC, dl, MVT::i32, CRBit);
-
- } else {
- return SDValue(DAG.getMachineNode(
- PPC::SELECT_CC_I4, dl, MVT::i32,
- {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32),
- DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
- 0);
}
+
+ // Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
+ return SDValue(DAG.getMachineNode(
+ PPC::SELECT_CC_I4, dl, MVT::i32,
+ {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32),
+ DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
+ 0);
}
case Intrinsic::ppc_fnmsub: {
EVT VT = Op.getOperand(1).getValueType();
@@ -17345,40 +17352,32 @@ static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
if (SelectNode.getNumOperands() != 4)
return SDValue();
- SDValue Cond = SelectNode.getOperand(0);
- SDValue TrueVal = SelectNode.getOperand(1);
- SDValue FalseVal = SelectNode.getOperand(2);
- SDValue BrOpcode = SelectNode.getOperand(3);
+ ConstantSDNode *TrueOp = dyn_cast<ConstantSDNode>(SelectNode.getOperand(1));
+ ConstantSDNode *FalseOp = dyn_cast<ConstantSDNode>(SelectNode.getOperand(2));
- // Check if true_val is constant 1 and false_val is constant 0
- ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueVal);
- ConstantSDNode *FalseConst = dyn_cast<ConstantSDNode>(FalseVal);
-
- if (!TrueConst || !FalseConst)
+ if (!TrueOp || !FalseOp)
return SDValue();
- if (!((TrueConst->isOne() && FalseConst->isZero()) ||
- (TrueConst->isZero() && FalseConst->isOne())))
+ // Only optimize if operands are {0, 1} or {1, 0}
+ if (!((TrueOp->isOne() && FalseOp->isZero()) ||
+ (TrueOp->isZero() && FalseOp->isOne())))
return SDValue();
- // Pattern matched! Create new SELECT_CC with swapped operands
+ // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to eliminate XOR.
+ // If original was SELECT_CC(cond, 1, 0, pred), create SELECT_CC(cond, 0, 1, pred).
+ // If original was SELECT_CC(cond, 0, 1, pred), create SELECT_CC(cond, 1, 0, pred).
SDLoc DL(N);
MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
- SDValue ZeroValue = DAG.getConstant(0, DL, XorVT);
- SDValue OneValue = DAG.getConstant(1, DL, XorVT);
-
- // Create new MachineSDNode: SELECT_CC_I4/8(cond, 0, 1, bropc)
- SDValue NewSelect = SDValue(
+
+ bool TrueOpIsOne = TrueOp->isOne();
+ return SDValue(
DAG.getMachineNode(
MachineOpc, DL, XorVT,
- {Cond, // Same condition
- TrueConst->isOne() ? ZeroValue : OneValue, // 0 (was false, now true)
- FalseConst->isZero() ? OneValue
- : ZeroValue, // 1 (was true, now false)
- BrOpcode}), // Same branch opcode
+ {SelectNode.getOperand(0),
+ DAG.getConstant(TrueOpIsOne ? 0 : 1, DL, XorVT),
+ DAG.getConstant(TrueOpIsOne ? 1 : 0, DL, XorVT),
+ SelectNode.getOperand(3)}),
0);
-
- return NewSelect;
}
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
@@ -17445,11 +17444,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
}
break;
- case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
if (SDValue RetV = combineZextSetccWithZero(N, DCI.DAG))
return RetV;
[[fallthrough]];
+ case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND:
return DAGCombineExtBoolTrunc(N, DCI);
case ISD::TRUNCATE:
>From 9b632faea815805160be94fedf4853e17777d778 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 18 Feb 2026 19:00:51 +0000
Subject: [PATCH 5/9] clang format
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 46 +++++++++++----------
1 file changed, 24 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 6a30dcad7e095..91392035ac7b5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11362,20 +11362,22 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
: (OpVT == MVT::f64 ? PPC::XSTSTDCDP
: PPC::XSTSTDCSP);
// Lower __builtin_ppc_test_data_class(value, mask) to XSTSTDC* instruction.
- // The XSTSTDC* instructions test if a floating-point value matches any of the
- // data classes specified in the mask, setting CR field bits accordingly.
- // We need to extract the EQ bit (bit 2) from the CR field and convert it to
- // an integer result (1 if match, 0 if no match).
+ // The XSTSTDC* instructions test if a floating-point value matches any of
+ // the data classes specified in the mask, setting CR field bits
+ // accordingly. We need to extract the EQ bit (bit 2) from the CR field and
+ // convert it to an integer result (1 if match, 0 if no match).
//
// Note: Operands are swapped because XSTSTDC* expects (mask, value) but the
- // intrinsic provides (value, mask) as Op.getOperand(1) and Op.getOperand(2).
+ // intrinsic provides (value, mask) as Op.getOperand(1) and
+ // Op.getOperand(2).
SDValue TestDataClass =
SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32,
{Op.getOperand(2), Op.getOperand(1)}),
0);
if (Subtarget.isISA3_1()) {
// ISA 3.1+: Use SETBC instruction to directly convert CR bit to integer.
- // This is more efficient than the SELECT_CC approach used in earlier ISAs.
+ // This is more efficient than the SELECT_CC approach used in earlier
+ // ISAs.
SDValue SubRegIdx = DAG.getTargetConstant(PPC::sub_eq, dl, MVT::i32);
SDValue CRBit =
SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
@@ -11386,12 +11388,12 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
// Pre-ISA 3.1: Use SELECT_CC to convert CR field to integer (1 or 0).
- return SDValue(DAG.getMachineNode(
- PPC::SELECT_CC_I4, dl, MVT::i32,
- {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32),
- DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
- 0);
+ return SDValue(
+ DAG.getMachineNode(PPC::SELECT_CC_I4, dl, MVT::i32,
+ {TestDataClass, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32),
+ DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
+ 0);
}
case Intrinsic::ppc_fnmsub: {
EVT VT = Op.getOperand(1).getValueType();
@@ -17363,20 +17365,20 @@ static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
(TrueOp->isZero() && FalseOp->isOne())))
return SDValue();
- // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to eliminate XOR.
- // If original was SELECT_CC(cond, 1, 0, pred), create SELECT_CC(cond, 0, 1, pred).
- // If original was SELECT_CC(cond, 0, 1, pred), create SELECT_CC(cond, 1, 0, pred).
+ // Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
+ // eliminate XOR. If original was SELECT_CC(cond, 1, 0, pred), create
+ // SELECT_CC(cond, 0, 1, pred). If original was SELECT_CC(cond, 0, 1, pred),
+ // create SELECT_CC(cond, 1, 0, pred).
SDLoc DL(N);
MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
-
+
bool TrueOpIsOne = TrueOp->isOne();
return SDValue(
- DAG.getMachineNode(
- MachineOpc, DL, XorVT,
- {SelectNode.getOperand(0),
- DAG.getConstant(TrueOpIsOne ? 0 : 1, DL, XorVT),
- DAG.getConstant(TrueOpIsOne ? 1 : 0, DL, XorVT),
- SelectNode.getOperand(3)}),
+ DAG.getMachineNode(MachineOpc, DL, XorVT,
+ {SelectNode.getOperand(0),
+ DAG.getConstant(TrueOpIsOne ? 0 : 1, DL, XorVT),
+ DAG.getConstant(TrueOpIsOne ? 1 : 0, DL, XorVT),
+ SelectNode.getOperand(3)}),
0);
}
>From 17ad3c781eeb86d00c7b4c807073e73a17eda832 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 18 Feb 2026 19:16:50 +0000
Subject: [PATCH 6/9] git clange format
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 91392035ac7b5..502d6b144528a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17262,7 +17262,7 @@ static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG) {
ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
if(!isNullConstant(RHS) && !isNullConstant(LHS))
- return SDValue();
+ return SDValue();
SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS ;
@@ -17284,14 +17284,13 @@ static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG) {
// This keeps the value in i32 instead of converting to i1
SDLoc DL(N);
EVT VType = N->getValueType(0);
- SDValue NewNonNullConstant =
- DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
+ SDValue NewNonNullConstant = DAG.getZExtOrTrunc(NonNullConstant, DL, VType);
if (CC == ISD::SETNE)
return NewNonNullConstant;
SDValue One = DAG.getConstant(1, DL, VType);
- return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
+ return DAG.getNode(ISD::XOR, DL, VType, NewNonNullConstant, One);
}
return SDValue();
>From 9ff3b76abe5739e6676eda0bb1b4e2f9d71dc4c0 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 18 Feb 2026 20:05:46 +0000
Subject: [PATCH 7/9] clang format
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 502d6b144528a..604ff40bdef9b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17261,10 +17261,10 @@ static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG) {
SDValue RHS = Src.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
- if(!isNullConstant(RHS) && !isNullConstant(LHS))
+ if (!isNullConstant(RHS) && !isNullConstant(LHS))
return SDValue();
- SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS ;
+ SDValue NonNullConstant = isNullConstant(RHS) ? LHS : RHS;
auto isZeroOrOne = [=](SDValue &V) {
if (V.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
>From 5747f2486a5fb5d60af9b584bf96f993bd6c2f07 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 19 Feb 2026 15:46:59 +0000
Subject: [PATCH 8/9] address comment
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 44 ++++++++++-----------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 604ff40bdef9b..dc773be2d5751 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17223,28 +17223,28 @@ static SDValue DAGCombineAddc(SDNode *N,
return SDValue();
}
-/// Optimize zero-extension of setcc when the compared value is known to be 0
-/// or 1.
-///
-/// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
-/// -> zext(xor(Value, 1)) for seteq
-/// -> zext(Value) for setne
-///
-/// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
-/// by keeping the value in its original i32 type throughout.
-///
-/// Example:
-/// Before: zext(setcc(test_data_class(...), 0, seteq))
-/// // test_data_class returns 0 or 1 in i32
-/// // setcc converts i32 -> i1
-/// // zext converts i1 -> i64
-/// After: zext(xor(test_data_class(...), 1))
-/// // Stays in i32, then extends to i64
-///
-/// This is beneficial because:
-/// 1. Eliminates the setcc instruction
-/// 2. Avoids i32 -> i1 truncation
-/// 3. Keeps computation in native integer width
+// Optimize zero-extension of setcc when the compared value is known to be 0
+// or 1.
+//
+// Pattern: zext(setcc(Value, 0, seteq/setne)) where Value is 0 or 1
+// -> zext(xor(Value, 1)) for seteq
+// -> zext(Value) for setne
+//
+// This optimization avoids the i32 -> i1 -> i32/i64 conversion sequence
+// by keeping the value in its original i32 type throughout.
+//
+// Example:
+// Before: zext(setcc(test_data_class(...), 0, seteq))
+// // test_data_class returns 0 or 1 in i32
+// // setcc converts i32 -> i1
+// // zext converts i1 -> i64
+// After: zext(xor(test_data_class(...), 1))
+// // Stays in i32, then extends to i64
+//
+// This is beneficial because:
+// 1. Eliminates the setcc instruction
+// 2. Avoids i32 -> i1 truncation
+// 3. Keeps computation in native integer width
static SDValue combineZextSetccWithZero(SDNode *N, SelectionDAG &DAG) {
// Check if this is a zero_extend
>From 55fe04a328cf654697a80f81d37c34b591d885e4 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 19 Feb 2026 19:42:42 +0000
Subject: [PATCH 9/9] nit: change variable name
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index dc773be2d5751..905864a92a6d8 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17353,15 +17353,15 @@ static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
if (SelectNode.getNumOperands() != 4)
return SDValue();
- ConstantSDNode *TrueOp = dyn_cast<ConstantSDNode>(SelectNode.getOperand(1));
- ConstantSDNode *FalseOp = dyn_cast<ConstantSDNode>(SelectNode.getOperand(2));
+ ConstantSDNode *ConstOp1 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(1));
+ ConstantSDNode *ConstOp2 = dyn_cast<ConstantSDNode>(SelectNode.getOperand(2));
- if (!TrueOp || !FalseOp)
+ if (!ConstOp1 || !ConstOp2)
return SDValue();
// Only optimize if operands are {0, 1} or {1, 0}
- if (!((TrueOp->isOne() && FalseOp->isZero()) ||
- (TrueOp->isZero() && FalseOp->isOne())))
+ if (!((ConstOp1->isOne() && ConstOp2->isZero()) ||
+ (ConstOp1->isZero() && ConstOp2->isOne())))
return SDValue();
// Pattern matched! Create new SELECT_CC with swapped 0/1 operands to
@@ -17371,12 +17371,12 @@ static SDValue combineXorSelectCC(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
MachineOpc = (XorVT == MVT::i32) ? PPC::SELECT_CC_I4 : PPC::SELECT_CC_I8;
- bool TrueOpIsOne = TrueOp->isOne();
+ bool ConstOp1IsOne = ConstOp1->isOne();
return SDValue(
DAG.getMachineNode(MachineOpc, DL, XorVT,
{SelectNode.getOperand(0),
- DAG.getConstant(TrueOpIsOne ? 0 : 1, DL, XorVT),
- DAG.getConstant(TrueOpIsOne ? 1 : 0, DL, XorVT),
+ DAG.getConstant(ConstOp1IsOne ? 0 : 1, DL, XorVT),
+ DAG.getConstant(ConstOp1IsOne ? 1 : 0, DL, XorVT),
SelectNode.getOperand(3)}),
0);
}
More information about the llvm-commits
mailing list