[llvm] 922ab66 - [InstCombine] Drop nowrap flags in `foldBitCeil` (#125817)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 5 00:49:42 PST 2025
Author: Yingwei Zheng
Date: 2025-02-05T16:49:39+08:00
New Revision: 922ab6650d7a01d2d44a10161529a3d576324037
URL: https://github.com/llvm/llvm-project/commit/922ab6650d7a01d2d44a10161529a3d576324037
DIFF: https://github.com/llvm/llvm-project/commit/922ab6650d7a01d2d44a10161529a3d576324037.diff
LOG: [InstCombine] Drop nowrap flags in `foldBitCeil` (#125817)
For convenience this patch drops nsw for `sub`. It also allows this fold
with `ctlz_zero_undef`.
Alive2: https://alive2.llvm.org/ce/z/VmvqSt
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
llvm/test/Transforms/InstCombine/bit_ceil.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 382078e85a17b6d..2e14145aef88411 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3432,7 +3432,7 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
const APInt *Cond1, Value *CtlzOp,
unsigned BitWidth,
- bool &ShouldDropNUW) {
+ bool &ShouldDropNoWrap) {
// The challenge in recognizing std::bit_ceil(X) is that the operand is used
// for the CTLZ proper and select condition, each possibly with some
// operation like add and sub.
@@ -3455,7 +3455,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
ConstantRange CR = ConstantRange::makeExactICmpRegion(
CmpInst::getInversePredicate(Pred), *Cond1);
- ShouldDropNUW = false;
+ ShouldDropNoWrap = false;
// Match the operation that's used to compute CtlzOp from CommonAncestor. If
// CtlzOp == CommonAncestor, return true as no operation is needed. If a
@@ -3466,11 +3466,12 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
if (CtlzOp == CommonAncestor)
return true;
if (match(CtlzOp, m_Add(m_Specific(CommonAncestor), m_APInt(C)))) {
+ ShouldDropNoWrap = true;
CR = CR.add(*C);
return true;
}
if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {
- ShouldDropNUW = true;
+ ShouldDropNoWrap = true;
CR = ConstantRange(*C).sub(CR);
return true;
}
@@ -3541,19 +3542,21 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
Pred = CmpInst::getInversePredicate(Pred);
}
- bool ShouldDropNUW;
+ bool ShouldDropNoWrap;
if (!match(FalseVal, m_One()) ||
!match(TrueVal,
m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),
m_Value(Ctlz)))))) ||
- !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
+ !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Value())) ||
!isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,
- ShouldDropNUW))
+ ShouldDropNoWrap))
return nullptr;
- if (ShouldDropNUW)
+ if (ShouldDropNoWrap) {
cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false);
+ cast<Instruction>(CtlzOp)->setHasNoSignedWrap(false);
+ }
// Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a
// single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth
@@ -3562,6 +3565,8 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
// Drop range attributes and re-infer them in the next iteration.
cast<Instruction>(Ctlz)->dropPoisonGeneratingAnnotations();
+ // Set is_zero_poison to false and re-infer them in the next iteration.
+ cast<Instruction>(Ctlz)->setOperand(1, Builder.getFalse());
IC.addToWorklist(cast<Instruction>(Ctlz));
Value *Neg = Builder.CreateNeg(Ctlz);
Value *Masked =
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index a2e27dfd6f64d36..09f90ee05735d63 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -302,9 +302,9 @@ define i32 @pr91691(i32 %0) {
ret i32 %7
}
-define i32 @pr91691_keep_nsw(i32 %0) {
-; CHECK-LABEL: @pr91691_keep_nsw(
-; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]]
+define i32 @pr91691_drop_nsw(i32 %0) {
+; CHECK-LABEL: @pr91691_drop_nsw(
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31
@@ -337,6 +337,46 @@ define i32 @test_drop_range_attr(i32 %x) {
ret i32 %sel
}
+define i32 @bit_ceil_plus_nsw(i32 %x) {
+; CHECK-LABEL: @bit_ceil_plus_nsw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31
+; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT: ret i32 [[SEL]]
+;
+entry:
+ %sub = add nsw i32 %x, 1
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
+ %sub2 = sub nuw nsw i32 32, %ctlz
+ %shl = shl nuw i32 1, %sub2
+ %ult = icmp ult i32 %x, 2147483647
+ %sel = select i1 %ult, i32 %shl, i32 1
+ ret i32 %sel
+}
+
+define i32 @bit_ceil_plus_nuw(i32 %x) {
+; CHECK-LABEL: @bit_ceil_plus_nuw(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[X:%.*]], 1
+; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false)
+; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
+; CHECK-NEXT: [[SUB2:%.*]] = and i32 [[TMP0]], 31
+; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB2]]
+; CHECK-NEXT: ret i32 [[SHL]]
+;
+entry:
+ %sub = add nuw i32 %x, 1
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
+ %sub2 = sub nuw nsw i32 32, %ctlz
+ %shl = shl nuw i32 1, %sub2
+ %ult = icmp ult i32 %x, 2147483647
+ %sel = select i1 %ult, i32 %shl, i32 1
+ ret i32 %sel
+}
+
declare i32 @llvm.ctlz.i32(i32, i1 immarg)
declare i64 @llvm.ctlz.i64(i64, i1 immarg)
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
More information about the llvm-commits
mailing list