[llvm] [InstCombine] Drop nuw flag when CtlzOp is a sub nuw (PR #91776)

Fri May 10 10:40:58 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

<details>
<summary>Changes</summary>

See the following case:
```
define i32 @src1(i32 %x) {
  %dec = sub nuw i32 -2, %x
  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
  %sub = sub nsw i32 32, %ctlz
  %shl = shl i32 1, %sub
  %ugt = icmp ult i32 %x, -2
  %sel = select i1 %ugt, i32 %shl, i32 1
  ret i32 %sel
}

define i32 @tgt1(i32 %x) {
  %dec = sub nuw i32 -2, %x
  %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
  %sub = sub nsw i32 32, %ctlz
  %and = and i32 %sub, 31
  %shl = shl nuw i32 1, %and
  ret i32 %shl
}
```
`nuw` in `%dec` should be dropped after the select instruction is eliminated.

Alive2: https://alive2.llvm.org/ce/z/7S9529

Fixes https://github.com/llvm/llvm-project/issues/91691.

BTW, I find that `foldBitCeil` never works in practice: https://dtcxzyw.github.io/llvm-opt-benchmark/coverage/home/dtcxzyw/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp.html#L3433
Clang emits phi instead of select for `std::bit_ceil`: https://godbolt.org/z/54YWfbq95


---
Full diff: https://github.com/llvm/llvm-project/pull/91776.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+12-2) 
- (modified) llvm/test/Transforms/InstCombine/bit_ceil.ll (+36) 


``````````diff

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8818369e79452..9a565481d9678 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3342,7 +3342,8 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
 // pattern.
 static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
                                         const APInt *Cond1, Value *CtlzOp,
-                                        unsigned BitWidth) {
+                                        unsigned BitWidth,
+                                        bool &ShouldDropNUW) {
   // The challenge in recognizing std::bit_ceil(X) is that the operand is used
   // for the CTLZ proper and select condition, each possibly with some
   // operation like add and sub.
@@ -3365,6 +3366,8 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
   ConstantRange CR = ConstantRange::makeExactICmpRegion(
       CmpInst::getInversePredicate(Pred), *Cond1);
 
+  ShouldDropNUW = false;
+
   // Match the operation that's used to compute CtlzOp from CommonAncestor.  If
   // CtlzOp == CommonAncestor, return true as no operation is needed.  If a
   // match is found, execute the operation on CR, update CR, and return true.
@@ -3378,6 +3381,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
       return true;
     }
     if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {
+      ShouldDropNUW = true;
       CR = ConstantRange(*C).sub(CR);
       return true;
     }
@@ -3447,14 +3451,20 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) {
     Pred = CmpInst::getInversePredicate(Pred);
   }
 
+  bool ShouldDropNUW;
+
   if (!match(FalseVal, m_One()) ||
       !match(TrueVal,
              m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),
                                                     m_Value(Ctlz)))))) ||
       !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
-      !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth))
+      !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,
+                                   ShouldDropNUW))
     return nullptr;
 
+  if (ShouldDropNUW)
+    cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false);
+
   // Build 1 << (-CTLZ & (BitWidth-1)).  The negation likely corresponds to a
   // single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth
   // is an integer constant.  Masking with BitWidth-1 comes free on some
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index 16631afa4878d..79665be01576a 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -284,6 +284,42 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) {
   ret <4 x i32> %sel
 }
 
+define i32 @pr91691(i32 %0) {
+; CHECK-LABEL: @pr91691(
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 31
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %2 = sub nuw i32 -2, %0
+  %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false)
+  %4 = sub i32 32, %3
+  %5 = shl i32 1, %4
+  %6 = icmp ult i32 %0, -2
+  %7 = select i1 %6, i32 %5, i32 1
+  ret i32 %7
+}
+
+define i32 @pr91691_keep_nsw(i32 %0) {
+; CHECK-LABEL: @pr91691_keep_nsw(
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 31
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %2 = sub nsw i32 -2, %0
+  %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false)
+  %4 = sub i32 32, %3
+  %5 = shl i32 1, %4
+  %6 = icmp ult i32 %0, -2
+  %7 = select i1 %6, i32 %5, i32 1
+  ret i32 %7
+}
+
 declare i32 @llvm.ctlz.i32(i32, i1 immarg)
 declare i64 @llvm.ctlz.i64(i64, i1 immarg)
 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)

``````````

</details>


https://github.com/llvm/llvm-project/pull/91776