[llvm] [InstCombine] Drop nowrap flags in `foldBitCeil` (PR #125817)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 4 23:46:25 PST 2025


https://github.com/dtcxzyw created https://github.com/llvm/llvm-project/pull/125817

For convenience this patch drops nsw for `sub`. It also allows this fold with `ctlz_zero_undef`.
Alive2: https://alive2.llvm.org/ce/z/VmvqSt


>From b36a0cd9d4beee5ddc8af370b13ea7d5fcd970e5 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 5 Feb 2025 15:08:16 +0800
Subject: [PATCH 1/3] [InstCombine] Add pre-commit tests. NFC.

---
 llvm/test/Transforms/InstCombine/bit_ceil.ll | 41 ++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index a2e27dfd6f64d36..17582b31baa1292 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -337,6 +337,47 @@ define i32 @test_drop_range_attr(i32 %x) {
   ret i32 %sel
 }
 
+define i32 @bit_ceil_plus_nsw(i32 %x) {
+; CHECK-LABEL: @bit_ceil_plus_nsw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 31
+; CHECK-NEXT:    [[SEL:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+entry:
+  %sub = add nsw i32 %x, 1
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
+  %sub2 = sub nuw nsw i32 32, %ctlz
+  %shl = shl nuw i32 1, %sub2
+  %ult = icmp ult i32 %x, 2147483647
+  %sel = select i1 %ult, i32 %shl, i32 1
+  ret i32 %sel
+}
+
+define i32 @bit_ceil_plus_nuw(i32 %x) {
+; CHECK-LABEL: @bit_ceil_plus_nuw(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB:%.*]] = add nuw i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 true)
+; CHECK-NEXT:    [[SUB2:%.*]] = sub nuw nsw i32 32, [[CTLZ]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[SUB2]]
+; CHECK-NEXT:    [[ULT:%.*]] = icmp ult i32 [[X]], 2147483647
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+entry:
+  %sub = add nuw i32 %x, 1
+  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
+  %sub2 = sub nuw nsw i32 32, %ctlz
+  %shl = shl nuw i32 1, %sub2
+  %ult = icmp ult i32 %x, 2147483647
+  %sel = select i1 %ult, i32 %shl, i32 1
+  ret i32 %sel
+}
+
 declare i32 @llvm.ctlz.i32(i32, i1 immarg)
 declare i64 @llvm.ctlz.i64(i64, i1 immarg)
 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)

>From ae148b5275250b12886495b868cb32faef1fcbfb Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 5 Feb 2025 15:09:38 +0800
Subject: [PATCH 2/3] [InstCombine] Drop nowrap flags in
 `isSafeToRemoveBitCeilSelect`

---
 .../Transforms/InstCombine/InstCombineSelect.cpp  | 15 +++++++++------
 llvm/test/Transforms/InstCombine/bit_ceil.ll      |  8 ++++----
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 382078e85a17b6d..842a1e16ea43a36 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3432,7 +3432,7 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
 static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
                                         const APInt *Cond1, Value *CtlzOp,
                                         unsigned BitWidth,
-                                        bool &ShouldDropNUW) {
+                                        bool &ShouldDropNoWrap) {
   // The challenge in recognizing std::bit_ceil(X) is that the operand is used
   // for the CTLZ proper and select condition, each possibly with some
   // operation like add and sub.
@@ -3455,7 +3455,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
   ConstantRange CR = ConstantRange::makeExactICmpRegion(
       CmpInst::getInversePredicate(Pred), *Cond1);
 
-  ShouldDropNUW = false;
+  ShouldDropNoWrap = false;
 
   // Match the operation that's used to compute CtlzOp from CommonAncestor.  If
   // CtlzOp == CommonAncestor, return true as no operation is needed.  If a
@@ -3466,11 +3466,12 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
     if (CtlzOp == CommonAncestor)
       return true;
     if (match(CtlzOp, m_Add(m_Specific(CommonAncestor), m_APInt(C)))) {
+      ShouldDropNoWrap = true;
       CR = CR.add(*C);
       return true;
     }
     if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {
-      ShouldDropNUW = true;
+      ShouldDropNoWrap = true;
       CR = ConstantRange(*C).sub(CR);
       return true;
     }
@@ -3541,7 +3542,7 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
     Pred = CmpInst::getInversePredicate(Pred);
   }
 
-  bool ShouldDropNUW;
+  bool ShouldDropNoWrap;
 
   if (!match(FalseVal, m_One()) ||
       !match(TrueVal,
@@ -3549,11 +3550,13 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
                                                     m_Value(Ctlz)))))) ||
       !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
       !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,
-                                   ShouldDropNUW))
+                                   ShouldDropNoWrap))
     return nullptr;
 
-  if (ShouldDropNUW)
+  if (ShouldDropNoWrap) {
     cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false);
+    cast<Instruction>(CtlzOp)->setHasNoSignedWrap(false);
+  }
 
   // Build 1 << (-CTLZ & (BitWidth-1)).  The negation likely corresponds to a
   // single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index 17582b31baa1292..50c92529f048717 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -302,9 +302,9 @@ define i32 @pr91691(i32 %0) {
   ret i32 %7
 }
 
-define i32 @pr91691_keep_nsw(i32 %0) {
-; CHECK-LABEL: @pr91691_keep_nsw(
-; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]]
+define i32 @pr91691_drop_nsw(i32 %0) {
+; CHECK-LABEL: @pr91691_drop_nsw(
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 31
@@ -340,7 +340,7 @@ define i32 @test_drop_range_attr(i32 %x) {
 define i32 @bit_ceil_plus_nsw(i32 %x) {
 ; CHECK-LABEL: @bit_ceil_plus_nsw(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false)
 ; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 31

>From ab0cbc94584baddd888d7f2b7639f9e5de7af0d9 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Wed, 5 Feb 2025 15:13:24 +0800
Subject: [PATCH 3/3] [InstCombine] Allow fold with `is_zero_poison = true`

---
 llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp |  4 +++-
 llvm/test/Transforms/InstCombine/bit_ceil.ll          | 11 +++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 842a1e16ea43a36..2e14145aef88411 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3548,7 +3548,7 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
       !match(TrueVal,
              m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),
                                                     m_Value(Ctlz)))))) ||
-      !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
+      !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Value())) ||
       !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,
                                    ShouldDropNoWrap))
     return nullptr;
@@ -3565,6 +3565,8 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
 
   // Drop range attributes and re-infer them in the next iteration.
   cast<Instruction>(Ctlz)->dropPoisonGeneratingAnnotations();
+  // Set is_zero_poison to false and re-infer them in the next iteration.
+  cast<Instruction>(Ctlz)->setOperand(1, Builder.getFalse());
   IC.addToWorklist(cast<Instruction>(Ctlz));
   Value *Neg = Builder.CreateNeg(Ctlz);
   Value *Masked =
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index 50c92529f048717..09f90ee05735d63 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -360,13 +360,12 @@ entry:
 define i32 @bit_ceil_plus_nuw(i32 %x) {
 ; CHECK-LABEL: @bit_ceil_plus_nuw(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SUB:%.*]] = add nuw i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 true)
-; CHECK-NEXT:    [[SUB2:%.*]] = sub nuw nsw i32 32, [[CTLZ]]
+; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
+; CHECK-NEXT:    [[SUB2:%.*]] = and i32 [[TMP0]], 31
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[SUB2]]
-; CHECK-NEXT:    [[ULT:%.*]] = icmp ult i32 [[X]], 2147483647
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1
-; CHECK-NEXT:    ret i32 [[SEL]]
+; CHECK-NEXT:    ret i32 [[SHL]]
 ;
 entry:
   %sub = add nuw i32 %x, 1



More information about the llvm-commits mailing list