[llvm] [PowerPC] fix Inefficient std::bit_floor(x) (PR #183361)

zhijian lin via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 1 06:19:18 PDT 2026


https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/183361

>From a9d1301c7f179e6977c0f8e0604e685efe6d8b42 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 24 Feb 2026 21:13:00 +0000
Subject: [PATCH 01/10] implement bit_floor

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 50 +++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 84d26448a7f4f..371c67e80d250 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17204,6 +17204,54 @@ static SDValue DAGCombineAddc(SDNode *N,
   }
   return SDValue();
 }
+/// Optimize the bitfloor pattern for PowerPC.
+/// Transforms: select_cc X, 0, 0, (srl MinSignedValue, (ctlz X)), seteq
+/// Into: srl MinSignedValue, (ctlz X)
+///
+/// This is safe on PowerPC because the srw instruction returns 0 when the
+/// shift amount is == bitwidth, which matches the behavior we need for X=0.
+static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::SELECT_CC && "Expected SELECT_CC node");
+
+  // SELECT_CC operands: LHS, RHS, TrueVal, FalseVal, CC
+  SDValue CmpLHS = N->getOperand(0);
+  SDValue CmpRHS = N->getOperand(1);
+  SDValue TrueVal = N->getOperand(2);
+  SDValue FalseVal = N->getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+  // Check if condition is (X == 0)
+  if (CC != ISD::SETEQ || !isNullConstant(CmpRHS))
+    return SDValue();
+
+  // Check if TrueVal is constant 0
+  if (!isNullConstant(TrueVal))
+    return SDValue();
+
+  // Check if FalseVal is (srl MinSignedValue, ShiftAmt)
+  if (FalseVal.getOpcode() != ISD::SRL)
+    return SDValue();
+
+  SDValue ShiftVal = FalseVal.getOperand(0);
+  SDValue ShiftAmt = FalseVal.getOperand(1);
+
+  // Check if ShiftVal is MinSignedValue
+  auto *ShiftConst = dyn_cast<ConstantSDNode>(ShiftVal);
+  if (!ShiftConst || !ShiftConst->getAPIntValue().isMinSignedValue())
+    return SDValue();
+
+  // Check if ShiftAmt is (ctlz CmpLHS
+  if (ShiftAmt.getOpcode() != ISD::CTLZ)
+    return SDValue();
+
+  SDValue CtlzArg = ShiftAmt.getOperand(0);
+
+  // Check if ctlz operates on the same value as the comparison
+  if (CtlzArg != CmpLHS)
+    return SDValue();
+
+  return FalseVal;
+}
 
 // Optimize zero-extension of setcc when the compared value is known to be 0
 // or 1.
@@ -17441,6 +17489,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       return CSCC;
     [[fallthrough]];
   case ISD::SELECT_CC:
+    if (SDValue V = combineSELECT_CCBitFloor(N, DAG))
+      return V;
     return DAGCombineTruncBoolExt(N, DCI);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:

>From a4acb44532cfe401dccb1a668cecd5a5bcd53bd7 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 25 Feb 2026 11:02:32 -0500
Subject: [PATCH 02/10] minor change

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 371c67e80d250..8b064673ebf7c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17204,7 +17204,8 @@ static SDValue DAGCombineAddc(SDNode *N,
   }
   return SDValue();
 }
-/// Optimize the bitfloor pattern for PowerPC.
+
+/// Optimize the bitfloor(X) pattern for PowerPC.
 /// Transforms: select_cc X, 0, 0, (srl MinSignedValue, (ctlz X)), seteq
 /// Into: srl MinSignedValue, (ctlz X)
 ///

>From 3897a5201a095b217af7ae1cf5054917a5afae54 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 25 Feb 2026 18:35:13 +0000
Subject: [PATCH 03/10] support 64bit_floor

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 22 ++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8b064673ebf7c..258ab65a88f89 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17212,7 +17212,8 @@ static SDValue DAGCombineAddc(SDNode *N,
 /// This is safe on PowerPC because the srw instruction returns 0 when the
 /// shift amount is == bitwidth, which matches the behavior we need for X=0.
 static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
-  assert(N->getOpcode() == ISD::SELECT_CC && "Expected SELECT_CC node");
+  if (N->getOpcode() != ISD::SELECT_CC)
+    return SDValue();
 
   // SELECT_CC operands: LHS, RHS, TrueVal, FalseVal, CC
   SDValue CmpLHS = N->getOperand(0);
@@ -17241,11 +17242,22 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
   if (!ShiftConst || !ShiftConst->getAPIntValue().isMinSignedValue())
     return SDValue();
 
-  // Check if ShiftAmt is (ctlz CmpLHS
-  if (ShiftAmt.getOpcode() != ISD::CTLZ)
-    return SDValue();
+  SDValue CtlzArg;
+  // Check if ShiftAmt is (ctlz CmpLHS) or (truncate (ctlz ...))
+  if (ShiftAmt.getOpcode() != ISD::CTLZ) {
+    // Look through truncate if present (for i64 ctlz truncated to i32 shift
+    // amount)
+    if (ShiftAmt.getOpcode() != ISD::TRUNCATE)
+      return SDValue();
+
+    SDValue CtlzNode = ShiftAmt.getOperand(0);
 
-  SDValue CtlzArg = ShiftAmt.getOperand(0);
+    if (CtlzNode.getOpcode() != ISD::CTLZ)
+      return SDValue();
+
+    CtlzArg = CtlzNode.getOperand(0);
+  } else
+    CtlzArg = ShiftAmt.getOperand(0);
 
   // Check if ctlz operates on the same value as the comparison
   if (CtlzArg != CmpLHS)

>From 86cbf190c4be7d84edd512b50f08118f2a360d67 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Mon, 2 Mar 2026 16:45:54 +0000
Subject: [PATCH 04/10] add more comment

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 258ab65a88f89..816b535791eca 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17231,6 +17231,9 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
     return SDValue();
 
   // Check if FalseVal is (srl MinSignedValue, ShiftAmt)
+  // We check for ISD::SRL here (not PPCISD::SRL) because this runs during
+  // DAGCombine before instruction selection. PowerPC's SRD/SRW instructions
+  // guarantee that a shift by bitwidth returns 0, which matches our needs.
   if (FalseVal.getOpcode() != ISD::SRL)
     return SDValue();
 

>From 5a0794ff054147e80eda4d49197e89a70f8d1706 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Mon, 2 Mar 2026 20:41:53 +0000
Subject: [PATCH 05/10] modify the bit_floor.ll based on the new behavious

---
 llvm/test/CodeGen/PowerPC/bit_floor.ll | 44 ++++++++++----------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/bit_floor.ll b/llvm/test/CodeGen/PowerPC/bit_floor.ll
index 916ef0a9451c4..2a2ef30eb5630 100644
--- a/llvm/test/CodeGen/PowerPC/bit_floor.ll
+++ b/llvm/test/CodeGen/PowerPC/bit_floor.ll
@@ -6,29 +6,23 @@
 define noundef range(i32 0, -2147483647) i32 @_Z10bitfloor32j(i32 noundef %x) local_unnamed_addr {
 ; AIX32-LABEL: _Z10bitfloor32j:
 ; AIX32:       # %bb.0: # %entry
-; AIX32-NEXT:    cntlzw r4, r3
-; AIX32-NEXT:    lis r5, -32768
-; AIX32-NEXT:    cmplwi r3, 0
-; AIX32-NEXT:    srw r4, r5, r4
-; AIX32-NEXT:    iseleq r3, 0, r4
+; AIX32-NEXT:    cntlzw r3, r3
+; AIX32-NEXT:    lis r4, -32768
+; AIX32-NEXT:    srw r3, r4, r3
 ; AIX32-NEXT:    blr
 ;
 ; AIX64-LABEL: _Z10bitfloor32j:
 ; AIX64:       # %bb.0: # %entry
-; AIX64-NEXT:    cntlzw r4, r3
-; AIX64-NEXT:    lis r5, -32768
-; AIX64-NEXT:    cmplwi r3, 0
-; AIX64-NEXT:    srw r4, r5, r4
-; AIX64-NEXT:    iseleq r3, 0, r4
+; AIX64-NEXT:    cntlzw r3, r3
+; AIX64-NEXT:    lis r4, -32768
+; AIX64-NEXT:    srw r3, r4, r3
 ; AIX64-NEXT:    blr
 ;
 ; LNX64-LABEL: _Z10bitfloor32j:
 ; LNX64:       # %bb.0: # %entry
-; LNX64-NEXT:    cntlzw r4, r3
-; LNX64-NEXT:    cmplwi r3, 0
-; LNX64-NEXT:    lis r5, -32768
-; LNX64-NEXT:    srw r4, r5, r4
-; LNX64-NEXT:    iseleq r3, 0, r4
+; LNX64-NEXT:    cntlzw r3, r3
+; LNX64-NEXT:    lis r4, -32768
+; LNX64-NEXT:    srw r3, r4, r3
 ; LNX64-NEXT:    blr
 entry:
   %cmp.i = icmp eq i32 %x, 0
@@ -62,22 +56,18 @@ define noundef range(i64 0, -9223372036854775807) i64 @_Z10bitfloor64y(i64 nound
 ;
 ; AIX64-LABEL: _Z10bitfloor64y:
 ; AIX64:       # %bb.0: # %entry
-; AIX64-NEXT:    li r5, 1
-; AIX64-NEXT:    cntlzd r4, r3
-; AIX64-NEXT:    cmpldi r3, 0
-; AIX64-NEXT:    rldic r5, r5, 63, 0
-; AIX64-NEXT:    srd r4, r5, r4
-; AIX64-NEXT:    iseleq r3, 0, r4
+; AIX64-NEXT:    li r4, 1
+; AIX64-NEXT:    cntlzd r3, r3
+; AIX64-NEXT:    rldic r4, r4, 63, 0
+; AIX64-NEXT:    srd r3, r4, r3
 ; AIX64-NEXT:    blr
 ;
 ; LNX64-LABEL: _Z10bitfloor64y:
 ; LNX64:       # %bb.0: # %entry
-; LNX64-NEXT:    li r5, 1
-; LNX64-NEXT:    cntlzd r4, r3
-; LNX64-NEXT:    cmpldi r3, 0
-; LNX64-NEXT:    rldic r5, r5, 63, 0
-; LNX64-NEXT:    srd r4, r5, r4
-; LNX64-NEXT:    iseleq r3, 0, r4
+; LNX64-NEXT:    li r4, 1
+; LNX64-NEXT:    cntlzd r3, r3
+; LNX64-NEXT:    rldic r4, r4, 63, 0
+; LNX64-NEXT:    srd r3, r4, r3
 ; LNX64-NEXT:    blr
 entry:
   %cmp.i = icmp eq i64 %x, 0

>From 26ddac2bb60014de7bdce0d2d22954c140e83f19 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 3 Mar 2026 20:51:52 +0000
Subject: [PATCH 06/10] replace ISD::SRL with PPCISD::SRL

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 816b535791eca..77d70108777f6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17230,11 +17230,11 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
   if (!isNullConstant(TrueVal))
     return SDValue();
 
-  // Check if FalseVal is (srl MinSignedValue, ShiftAmt)
-  // We check for ISD::SRL here (not PPCISD::SRL) because this runs during
-  // DAGCombine before instruction selection. PowerPC's SRD/SRW instructions
-  // guarantee that a shift by bitwidth returns 0, which matches our needs.
-  if (FalseVal.getOpcode() != ISD::SRL)
+  // DAGCombine before instruction selection. We also require hasOneUse() to
+  // ensure we can safely replace ISD::SRL with PPCISD::SRL without affecting
+  // other users. PowerPC's SRD/SRW instructions guarantee that a shift by
+  // bitwidth returns 0, which matches our needs for the bitfloor(0) case.
+  if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
     return SDValue();
 
   SDValue ShiftVal = FalseVal.getOperand(0);
@@ -17266,7 +17266,13 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
   if (CtlzArg != CmpLHS)
     return SDValue();
 
-  return FalseVal;
+  // Replace ISD::SRL with PPCISD::SRL to ensure well-defined behavior.
+  // On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
+  // which is exactly what we need for the bitfloor(0) case.
+  SDLoc DL(N);
+  SDValue PPCSrl = DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(),
+                               ShiftVal, ShiftAmt);
+  return PPCSrl; 
 }
 
 // Optimize zero-extension of setcc when the compared value is known to be 0

>From db9c1e95c0c452f1e1c42306b2b5240592baff12 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 3 Mar 2026 21:02:50 +0000
Subject: [PATCH 07/10] reorganize the comment

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 77d70108777f6..72284415c4743 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17230,10 +17230,8 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
   if (!isNullConstant(TrueVal))
     return SDValue();
 
-  // DAGCombine before instruction selection. We also require hasOneUse() to
-  // ensure we can safely replace ISD::SRL with PPCISD::SRL without affecting
-  // other users. PowerPC's SRD/SRW instructions guarantee that a shift by
-  // bitwidth returns 0, which matches our needs for the bitfloor(0) case.
+  // We also require hasOneUse()  to ensure that we can safely replace ISD::SRL
+  // with PPCISD::SRL without affecting other users later.
   if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
     return SDValue();
 

>From 1a9fbd24fd23871c9c4a0692b9094c783aeb0ec5 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 4 Mar 2026 14:30:51 +0000
Subject: [PATCH 08/10] git clang format

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 72284415c4743..37f278e2d93f7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17268,8 +17268,8 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
   // On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
   // which is exactly what we need for the bitfloor(0) case.
   SDLoc DL(N);
-  SDValue PPCSrl = DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(),
-                               ShiftVal, ShiftAmt);
+  SDValue PPCSrl =
+      DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(), ShiftVal, ShiftAmt);
   return PPCSrl; 
 }
 

>From aca69fb977323e732dfc6b5aa9de16a2b2fe7909 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 4 Mar 2026 15:37:00 +0000
Subject: [PATCH 09/10] git clang format

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 37f278e2d93f7..dce16d5eefe01 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17270,7 +17270,7 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
   SDLoc DL(N);
   SDValue PPCSrl =
       DAG.getNode(PPCISD::SRL, DL, FalseVal.getValueType(), ShiftVal, ShiftAmt);
-  return PPCSrl; 
+  return PPCSrl;
 }
 
 // Optimize zero-extension of setcc when the compared value is known to be 0

>From b04ab785b6d7aec8dfb30440661f29a5d8af5fae Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 1 Apr 2026 13:31:46 +0000
Subject: [PATCH 10/10] address comment

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index dce16d5eefe01..43b4cd83bf11b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17230,8 +17230,9 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
   if (!isNullConstant(TrueVal))
     return SDValue();
 
-  // We also require hasOneUse()  to ensure that we can safely replace ISD::SRL
-  // with PPCISD::SRL without affecting other users later.
+  // This combine is replacing a select_cc with a PPC srl, not an srl with a
+  // PPC srl. If the original srl had multiple uses it would just remain in the
+  // code. This is at most a performance consideration.
   if (FalseVal.getOpcode() != ISD::SRL || !FalseVal.hasOneUse())
     return SDValue();
 
@@ -17251,20 +17252,26 @@ static SDValue combineSELECT_CCBitFloor(SDNode *N, SelectionDAG &DAG) {
     if (ShiftAmt.getOpcode() != ISD::TRUNCATE)
       return SDValue();
 
+    // Verify the truncate target type is appropriate for shift amount (i32, not
+    // i1 or other)
+    if (ShiftAmt.getValueType() != MVT::i32)
+      return SDValue();
+
     SDValue CtlzNode = ShiftAmt.getOperand(0);
 
     if (CtlzNode.getOpcode() != ISD::CTLZ)
       return SDValue();
 
     CtlzArg = CtlzNode.getOperand(0);
-  } else
+  } else {
     CtlzArg = ShiftAmt.getOperand(0);
+  }
 
   // Check if ctlz operates on the same value as the comparison
   if (CtlzArg != CmpLHS)
     return SDValue();
 
-  // Replace ISD::SRL with PPCISD::SRL to ensure well-defined behavior.
+  // Using PPCISD::SRL to ensure well-defined behavior.
   // On PowerPC, PPCISD::SRL guarantees that shift by bitwidth returns 0,
   // which is exactly what we need for the bitfloor(0) case.
   SDLoc DL(N);



More information about the llvm-commits mailing list