[llvm-branch-commits] [llvm] release/18.x: [InstCombine] Drop nuw flag when CtlzOp is a sub nuw (#91776) (PR #91917)
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed May 15 16:30:22 PDT 2024
https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/91917
>From e4bbaa67ac5bb4aa0c6a4699aaa164f58ab5ae61 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Mon, 13 May 2024 14:27:59 +0800
Subject: [PATCH 1/2] [InstCombine] Drop nuw flag when CtlzOp is a sub nuw
(#91776)
See the following case:
```
define i32 @src1(i32 %x) {
%dec = sub nuw i32 -2, %x
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
%sub = sub nsw i32 32, %ctlz
%shl = shl i32 1, %sub
%ugt = icmp ult i32 %x, -2
%sel = select i1 %ugt, i32 %shl, i32 1
ret i32 %sel
}
define i32 @tgt1(i32 %x) {
%dec = sub nuw i32 -2, %x
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
%sub = sub nsw i32 32, %ctlz
%and = and i32 %sub, 31
%shl = shl nuw i32 1, %and
ret i32 %shl
}
```
`nuw` in `%dec` should be dropped after the select instruction is
eliminated.
Alive2: https://alive2.llvm.org/ce/z/7S9529
Fixes https://github.com/llvm/llvm-project/issues/91691.
(cherry picked from commit b5f4210e9f51f938ae517f219f04f9ab431a2684)
---
.../InstCombine/InstCombineSelect.cpp | 14 ++++++--
llvm/test/Transforms/InstCombine/bit_ceil.ll | 36 +++++++++++++++++++
2 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8cc7901cbac7f..86a39cf2ee93f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3201,7 +3201,8 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
// pattern.
static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
const APInt *Cond1, Value *CtlzOp,
- unsigned BitWidth) {
+ unsigned BitWidth,
+ bool &ShouldDropNUW) {
// The challenge in recognizing std::bit_ceil(X) is that the operand is used
// for the CTLZ proper and select condition, each possibly with some
// operation like add and sub.
@@ -3224,6 +3225,8 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
ConstantRange CR = ConstantRange::makeExactICmpRegion(
CmpInst::getInversePredicate(Pred), *Cond1);
+ ShouldDropNUW = false;
+
// Match the operation that's used to compute CtlzOp from CommonAncestor. If
// CtlzOp == CommonAncestor, return true as no operation is needed. If a
// match is found, execute the operation on CR, update CR, and return true.
@@ -3237,6 +3240,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
return true;
}
if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {
+ ShouldDropNUW = true;
CR = ConstantRange(*C).sub(CR);
return true;
}
@@ -3306,14 +3310,20 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) {
Pred = CmpInst::getInversePredicate(Pred);
}
+ bool ShouldDropNUW;
+
if (!match(FalseVal, m_One()) ||
!match(TrueVal,
m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),
m_Value(Ctlz)))))) ||
!match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
- !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth))
+ !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,
+ ShouldDropNUW))
return nullptr;
+ if (ShouldDropNUW)
+ cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false);
+
// Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a
// single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth
// is an integer constant. Masking with BitWidth-1 comes free on some
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index 52e70c78ba542..63a5ae012eeb6 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -284,6 +284,42 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) {
ret <4 x i32> %sel
}
+define i32 @pr91691(i32 %0) {
+; CHECK-LABEL: @pr91691(
+; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
+; CHECK-NEXT: ret i32 [[TMP6]]
+;
+ %2 = sub nuw i32 -2, %0
+ %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false)
+ %4 = sub i32 32, %3
+ %5 = shl i32 1, %4
+ %6 = icmp ult i32 %0, -2
+ %7 = select i1 %6, i32 %5, i32 1
+ ret i32 %7
+}
+
+define i32 @pr91691_keep_nsw(i32 %0) {
+; CHECK-LABEL: @pr91691_keep_nsw(
+; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
+; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
+; CHECK-NEXT: ret i32 [[TMP6]]
+;
+ %2 = sub nsw i32 -2, %0
+ %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false)
+ %4 = sub i32 32, %3
+ %5 = shl i32 1, %4
+ %6 = icmp ult i32 %0, -2
+ %7 = select i1 %6, i32 %5, i32 1
+ ret i32 %7
+}
+
declare i32 @llvm.ctlz.i32(i32, i1 immarg)
declare i64 @llvm.ctlz.i64(i64, i1 immarg)
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
>From 832b3b058d7d2a99e996f41725dbcb05b09145b8 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar at redhat.com>
Date: Wed, 15 May 2024 16:30:15 -0700
Subject: [PATCH 2/2] Update llvm/test/Transforms/InstCombine/bit_ceil.ll
Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
llvm/test/Transforms/InstCombine/bit_ceil.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index 63a5ae012eeb6..2c459a8c9d6db 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -287,7 +287,7 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) {
define i32 @pr91691(i32 %0) {
; CHECK-LABEL: @pr91691(
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false), !range [[RNG0]]
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
@@ -305,7 +305,7 @@ define i32 @pr91691(i32 %0) {
define i32 @pr91691_keep_nsw(i32 %0) {
; CHECK-LABEL: @pr91691_keep_nsw(
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]]
-; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false), !range [[RNG0]]
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
More information about the llvm-branch-commits
mailing list