[llvm] [PowerPC] fix Inefficient std::bit_floor(x) (PR #183361)
zhijian lin via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 06:19:18 PDT 2026
https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/183361
>From a9d1301c7f179e6977c0f8e0604e685efe6d8b42 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 24 Feb 2026 21:13:00 +0000
Subject: [PATCH 01/10] implement bit_floor
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 50 +++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 84d26448a7f4f..371c67e80d250 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17204,6 +17204,54 @@ static SDValue DAGCombineAddc(SDNode *N,
}
return SDValue();
}
+/// Optimize the bitfloor pattern for PowerPC.
+/// Transforms: select_cc X, 0, 0, (srl MinSignedValue, (ctlz X)), seteq
+/// Into: srl MinSignedValue, (ctlz X)
+///
+/// This is safe on PowerPC because the srw instruction returns 0 when the
+/// shift amount is == bitwidth, which matches the behavior we need for X=0.
+static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == ISD::SELECT_CC && "Expected SELECT_CC node");
+
+ // SELECT_CC operands: LHS, RHS, TrueVal, FalseVal, CC
+ SDValue CmpLHS = N->getOperand(0);
+ SDValue CmpRHS = N->getOperand(1);
+ SDValue TrueVal = N->getOperand(2);
+ SDValue FalseVal = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ // Check if condition is (X == 0)
+ if (CC != ISD::SETEQ || !isNullConstant(CmpRHS))
+ return SDValue();
+
+ // Check if TrueVal is constant 0
+ if (!isNullConstant(TrueVal))
+ return SDValue();
+
+ // Check if FalseVal is (srl MinSignedValue, ShiftAmt)
+ if (FalseVal.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ SDValue ShiftVal = FalseVal.getOperand(0);
+ SDValue ShiftAmt = FalseVal.getOperand(1);
+
+ // Check if ShiftVal is MinSignedValue
+ auto *ShiftConst = dyn_cast<ConstantSDNode>(ShiftVal);
+ if (!ShiftConst || !ShiftConst->getAPIntValue().isMinSignedValue())
+ return SDValue();
+
+ // Check if ShiftAmt is (ctlz CmpLHS
+ if (ShiftAmt.getOpcode() != ISD::CTLZ)
+ return SDValue();
+
+ SDValue CtlzArg = ShiftAmt.getOperand(0);
+
+ // Check if ctlz operates on the same value as the comparison
+ if (CtlzArg != CmpLHS)
+ return SDValue();
+
+ return FalseVal;
+}
// Optimize zero-extension of setcc when the compared value is known to be 0
// or 1.
@@ -17441,6 +17489,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return CSCC;
[[fallthrough]];
case ISD::SELECT_CC:
+ if (SDValue V = combineSELECT_CCBitFloor(N, DAG))
+ return V;
return DAGCombineTruncBoolExt(N, DCI);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
>From a4acb44532cfe401dccb1a668cecd5a5bcd53bd7 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 25 Feb 2026 11:02:32 -0500
Subject: [PATCH 02/10] minor change
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 371c67e80d250..8b064673ebf7c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17204,7 +17204,8 @@ static SDValue DAGCombineAddc(SDNode *N,
}
return SDValue();
}
-/// Optimize the bitfloor pattern for PowerPC.
+
+/// Optimize the bitfloor(X) pattern for PowerPC.
/// Transforms: select_cc X, 0, 0, (srl MinSignedValue, (ctlz X)), seteq
/// Into: srl MinSignedValue, (ctlz X)
///
>From 3897a5201a095b217af7ae1cf5054917a5afae54 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 25 Feb 2026 18:35:13 +0000
Subject: [PATCH 03/10] support 64bit_floor
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 22 ++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8b064673ebf7c..258ab65a88f89 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17212,7 +17212,8 @@ static SDValue DAGCombineAddc(SDNode *N,
/// This is safe on PowerPC because the srw instruction returns 0 when the
/// shift amount is == bitwidth, which matches the behavior we need for X=0.
static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
- assert(N->getOpcode() == ISD::SELECT_CC && "Expected SELECT_CC node");
+ if (N->getOpcode() != ISD::SELECT_CC)
+ return SDValue();
// SELECT_CC operands: LHS, RHS, TrueVal, FalseVal, CC
SDValue CmpLHS = N->getOperand(0);
@@ -17241,11 +17242,22 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
if (!ShiftConst || !ShiftConst->getAPIntValue().isMinSignedValue())
return SDValue();
- // Check if ShiftAmt is (ctlz CmpLHS
- if (ShiftAmt.getOpcode() != ISD::CTLZ)
- return SDValue();
+ SDValue CtlzArg;
+ // Check if ShiftAmt is (ctlz CmpLHS) or (truncate (ctlz ...))
+ if (ShiftAmt.getOpcode() != ISD::CTLZ) {
+ // Look through truncate if present (for i64 ctlz truncated to i32 shift
+ // amount)
+ if (ShiftAmt.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+
+ SDValue CtlzNode = ShiftAmt.getOperand(0);
- SDValue CtlzArg = ShiftAmt.getOperand(0);
+ if (CtlzNode.getOpcode() != ISD::CTLZ)
+ return SDValue();
+
+ CtlzArg = CtlzNode.getOperand(0);
+ } else
+ CtlzArg = ShiftAmt.getOperand(0);
// Check if ctlz operates on the same value as the comparison
if (CtlzArg != CmpLHS)
>From 86cbf190c4be7d84edd512b50f08118f2a360d67 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Mon, 2 Mar 2026 16:45:54 +0000
Subject: [PATCH 04/10] add more comment
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +++
1 file changed, 3 insertions(+)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 258ab65a88f89..816b535791eca 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17231,6 +17231,9 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
return SDValue();
// Check if FalseVal is (srl MinSignedValue, ShiftAmt)
+ // We check for ISD::SRL here (not PPCISD::SRL) because this runs during
+ // DAGCombine before instruction selection. PowerPC's SRD/SRW instructions
+ // guarantee that a shift by bitwidth returns 0, which matches our needs.
if (FalseVal.getOpcode() != ISD::SRL)
return SDValue();
>From 5a0794ff054147e80eda4d49197e89a70f8d1706 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Mon, 2 Mar 2026 20:41:53 +0000
Subject: [PATCH 05/10] modify the bit_floor.ll based on the new behavious
---
llvm/test/CodeGen/PowerPC/bit_floor.ll | 44 ++++++++++----------------
1 file changed, 17 insertions(+), 27 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/bit_floor.ll b/llvm/test/CodeGen/PowerPC/bit_floor.ll
index 916ef0a9451c4..2a2ef30eb5630 100644
--- a/llvm/test/CodeGen/PowerPC/bit_floor.ll
+++ b/llvm/test/CodeGen/PowerPC/bit_floor.ll
@@ -6,29 +6,23 @@
define noundef range(i32 0, -2147483647) i32 @_Z10bitfloor32j(i32 noundef %x) local_unnamed_addr {
; AIX32-LABEL: _Z10bitfloor32j:
; AIX32: # %bb.0: # %entry
-; AIX32-NEXT: cntlzw r4, r3
-; AIX32-NEXT: lis r5, -32768
-; AIX32-NEXT: cmplwi r3, 0
-; AIX32-NEXT: srw r4, r5, r4
-; AIX32-NEXT: iseleq r3, 0, r4
+; AIX32-NEXT: cntlzw r3, r3
+; AIX32-NEXT: lis r4, -32768
+; AIX32-NEXT: srw r3, r4, r3
; AIX32-NEXT: blr
;
; AIX64-LABEL: _Z10bitfloor32j:
; AIX64: # %bb.0: # %entry
-; AIX64-NEXT: cntlzw r4, r3
-; AIX64-NEXT: lis r5, -32768
-; AIX64-NEXT: cmplwi r3, 0
-; AIX64-NEXT: srw r4, r5, r4
-; AIX64-NEXT: iseleq r3, 0, r4
+; AIX64-NEXT: cntlzw r3, r3
+; AIX64-NEXT: lis r4, -32768
+; AIX64-NEXT: srw r3, r4, r3
; AIX64-NEXT: blr
;
; LNX64-LABEL: _Z10bitfloor32j:
; LNX64: # %bb.0: # %entry
-; LNX64-NEXT: cntlzw r4, r3
-; LNX64-NEXT: cmplwi r3, 0
-; LNX64-NEXT: lis r5, -32768
-; LNX64-NEXT: srw r4, r5, r4
-; LNX64-NEXT: iseleq r3, 0, r4
+; LNX64-NEXT: cntlzw r3, r3
+; LNX64-NEXT: lis r4, -32768
+; LNX64-NEXT: srw r3, r4, r3
; LNX64-NEXT: blr
entry:
%cmp.i = icmp eq i32 %x, 0
@@ -62,22 +56,18 @@ define noundef range(i64 0, -9223372036854775807) i64 @_Z10bitfloor64y(i64 nound
;
; AIX64-LABEL: _Z10bitfloor64y:
; AIX64: # %bb.0: # %entry
-; AIX64-NEXT: li r5, 1
-; AIX64-NEXT: cntlzd r4, r3
-; AIX64-NEXT: cmpldi r3, 0
-; AIX64-NEXT: rldic r5, r5, 63, 0
-; AIX64-NEXT: srd r4, r5, r4
-; AIX64-NEXT: iseleq r3, 0, r4
+; AIX64-NEXT: li r4, 1
+; AIX64-NEXT: cntlzd r3, r3
+; AIX64-NEXT: rldic r4, r4, 63, 0
+; AIX64-NEXT: srd r3, r4, r3
; AIX64-NEXT: blr
;
; LNX64-LABEL: _Z10bitfloor64y:
; LNX64: # %bb.0: # %entry
-; LNX64-NEXT: li r5, 1
-; LNX64-NEXT: cntlzd r4, r3
-; LNX64-NEXT: cmpldi r3, 0
-; LNX64-NEXT: rldic r5, r5, 63, 0
-; LNX64-NEXT: srd r4, r5, r4
-; LNX64-NEXT: iseleq r3, 0, r4
+; LNX64-NEXT: li r4, 1
+; LNX64-NEXT: cntlzd r3, r3
+; LNX64-NEXT: rldic r4, r4, 63, 0
+; LNX64-NEXT: srd r3, r4, r3
; LNX64-NEXT: blr
entry:
%cmp.i = icmp eq i64 %x, 0
>From 26ddac2bb60014de7bdce0d2d22954c140e83f19 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 3 Mar 2026 20:51:52 +0000
Subject: [PATCH 06/10] replace ISD::SRL with PPCISD::SRL
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 816b535791eca..77d70108777f6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17230,11 +17230,11 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
if (!isNullConstant(TrueVal))
return SDValue();
- // Check if FalseVal is (srl MinSignedValue, ShiftAmt)
- // We check for ISD::SRL here (not PPCISD::SRL) because this runs during
- // DAGCombine before instruction selection. PowerPC's SRD/SRW instructions
- // guarantee that a shift by bitwidth returns 0, which matches our needs.
- if (FalseVal.getOpcode() != ISD::SRL)
+ // DAGCombine before instruction selection. We also require hasOneUse() to
+ // ensure we can safely replace ISD::SRL with PPCISD::SRL without affecting
+ // other users. PowerPC's SRD/SRW instructions guarantee that a shift by
+ // bitwidth returns 0, which matches our needs for the bitfloor(0) case.
+ if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
return SDValue();
SDValue ShiftVal = FalseVal.getOperand(0);
@@ -17266,7 +17266,13 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
if (CtlzArg != CmpLHS)
return SDValue();
- return FalseVal;
+ // Replace ISD::SRL with PPCISD::SRL to ensure well-defined behavior.
+ // On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
+ // which is exactly what we need for the bitfloor(0) case.
+ SDLoc DL(N);
+ SDValue PPCSrl = DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(),
+ ShiftVal, ShiftAmt);
+ return PPCSrl;
}
// Optimize zero-extension of setcc when the compared value is known to be 0
>From db9c1e95c0c452f1e1c42306b2b5240592baff12 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 3 Mar 2026 21:02:50 +0000
Subject: [PATCH 07/10] reorganize the comment
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 77d70108777f6..72284415c4743 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17230,10 +17230,8 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
if (!isNullConstant(TrueVal))
return SDValue();
- // DAGCombine before instruction selection. We also require hasOneUse() to
- // ensure we can safely replace ISD::SRL with PPCISD::SRL without affecting
- // other users. PowerPC's SRD/SRW instructions guarantee that a shift by
- // bitwidth returns 0, which matches our needs for the bitfloor(0) case.
+ // We also require hasOneUse() to ensure that we can safely replace ISD::SRL
+ // with PPCISD::SRL without affecting other users later.
if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
return SDValue();
>From 1a9fbd24fd23871c9c4a0692b9094c783aeb0ec5 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 4 Mar 2026 14:30:51 +0000
Subject: [PATCH 08/10] git clang format
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 72284415c4743..37f278e2d93f7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17268,8 +17268,8 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
// On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
// which is exactly what we need for the bitfloor(0) case.
SDLoc DL(N);
- SDValue PPCSrl = DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(),
- ShiftVal, ShiftAmt);
+ SDValue PPCSrl =
+ DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(), ShiftVal, ShiftAmt);
return PPCSrl;
}
>From aca69fb977323e732dfc6b5aa9de16a2b2fe7909 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 4 Mar 2026 15:37:00 +0000
Subject: [PATCH 09/10] git clang format
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 37f278e2d93f7..dce16d5eefe01 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17270,7 +17270,7 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue PPCSrl =
DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(), ShiftVal, ShiftAmt);
- return PPCSrl;
+ return PPCSrl;
}
// Optimize zero-extension of setcc when the compared value is known to be 0
>From b04ab785b6d7aec8dfb30440661f29a5d8af5fae Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 1 Apr 2026 13:31:46 +0000
Subject: [PATCH 10/10] address comment
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index dce16d5eefe01..43b4cd83bf11b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17230,8 +17230,9 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
if (!isNullConstant(TrueVal))
return SDValue();
- // We also require hasOneUse() to ensure that we can safely replace ISD::SRL
- // with PPCISD::SRL without affecting other users later.
+ // This combine is replacing a select_cc with a PPC srl, not an srl with a
+ // PPC srl. If the original srl had multiple uses it would just remain in the
+ // code. This is at most a performance consideration.
if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
return SDValue();
@@ -17251,20 +17252,26 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
if (ShiftAmt.getOpcode() != ISD::TRUNCATE)
return SDValue();
+ // Verify the truncate target type is appropriate for shift amount (i32, not
+ // i1 or other)
+ if (ShiftAmt.getValueType() != MVT::i32)
+ return SDValue();
+
SDValue CtlzNode = ShiftAmt.getOperand(0);
if (CtlzNode.getOpcode() != ISD::CTLZ)
return SDValue();
CtlzArg = CtlzNode.getOperand(0);
- } else
+ } else {
CtlzArg = ShiftAmt.getOperand(0);
+ }
// Check if ctlz operates on the same value as the comparison
if (CtlzArg != CmpLHS)
return SDValue();
- // Replace ISD::SRL with PPCISD::SRL to ensure well-defined behavior.
+ // Using PPCISD::SRL to ensure well-defined behavior.
// On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
// which is exactly what we need for the bitfloor(0) case.
SDLoc DL(N);
More information about the llvm-commits
mailing list