[llvm] [InstCombine] Drop exact flag instead of increasing demanded bits (PR #70311)

Thu Oct 26 02:23:53 PDT 2023

https://github.com/nikic created https://github.com/llvm/llvm-project/pull/70311

Demanded bit simplification for lshr/ashr will currently demand the low bits if the exact flag is set. This is because these bits must be zero to satisfy the flag.

However, this means that our demanded bits simplification is worse for lshr/ashr exact than it is for plain lshr/ashr, which is generally not desirable.

Instead, drop the exact flag if a demanded bits simplification of the operand succeeds, which may no longer satisfy the exact flag.

This matches what we do for the exact flag on udiv, as well as the nuw/nsw flags on add/sub/mul.

>From 2f52f879bf73290429841bd216e87789568e9821 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 26 Oct 2023 11:18:51 +0200
Subject: [PATCH] [InstCombine] Drop exact flag instead of increasing demanded
 bits

Demanded bit simplification for lshr/ashr will currently demand
the low bits if the exact flag is set. This is because these bits
must be zero to satisfy the flag.

However, this means that our demanded bits simplification is worse
for lshr/ashr exact than it is for plain lshr/ashr, which is
generally not desirable.

Instead, drop the exact flag if a demanded bits simplification of
the operand succeeds, which may no longer satisfy the exact flag.

This matches what we do for the exact flag on udiv, as well as
the nuw/nsw flags on add/sub/mul.
---
 .../InstCombineSimplifyDemanded.cpp           | 21 +++++++------------
 llvm/test/Transforms/InstCombine/cast.ll      |  4 ++--
 llvm/test/Transforms/InstCombine/select-2.ll  |  2 +-
 llvm/test/Transforms/InstCombine/shift.ll     |  8 +++----
 4 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index be005e61a8d2d89..cd6b017874e8d6c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -698,14 +698,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
 
       // Unsigned shift right.
       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
-
-      // If the shift is exact, then it does demand the low bits (and knows that
-      // they are zero).
-      if (cast<LShrOperator>(I)->isExact())
-        DemandedMaskIn.setLowBits(ShiftAmt);
-
-      if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
+      if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) {
+        // exact flag may not longer hold.
+        I->dropPoisonGeneratingFlags();
         return I;
+      }
       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
       Known.Zero.lshrInPlace(ShiftAmt);
       Known.One.lshrInPlace(ShiftAmt);
@@ -747,13 +744,11 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       if (DemandedMask.countl_zero() <= ShiftAmt)
         DemandedMaskIn.setSignBit();
 
-      // If the shift is exact, then it does demand the low bits (and knows that
-      // they are zero).
-      if (cast<AShrOperator>(I)->isExact())
-        DemandedMaskIn.setLowBits(ShiftAmt);
-
-      if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
+      if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) {
+        // exact flag may not longer hold.
+        I->dropPoisonGeneratingFlags();
         return I;
+      }
 
       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
       // Compute the new bits that are at the top now plus sign bits.
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index 5b480b1157936eb..59e488f3f23d52a 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -1318,7 +1318,7 @@ define i64 @test83(i16 %a, i64 %k) {
 define i8 @test84(i32 %a) {
 ; ALL-LABEL: @test84(
 ; ALL-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], 2130706432
-; ALL-NEXT:    [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; ALL-NEXT:    [[SHR:%.*]] = lshr i32 [[ADD]], 23
 ; ALL-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
 ; ALL-NEXT:    ret i8 [[TRUNC]]
 ;
@@ -1331,7 +1331,7 @@ define i8 @test84(i32 %a) {
 define i8 @test85(i32 %a) {
 ; ALL-LABEL: @test85(
 ; ALL-NEXT:    [[ADD:%.*]] = add i32 [[A:%.*]], 2130706432
-; ALL-NEXT:    [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; ALL-NEXT:    [[SHR:%.*]] = lshr i32 [[ADD]], 23
 ; ALL-NEXT:    [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
 ; ALL-NEXT:    ret i8 [[TRUNC]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/select-2.ll b/llvm/test/Transforms/InstCombine/select-2.ll
index 2e4161f5d80aaaa..148b0dcf1025980 100644
--- a/llvm/test/Transforms/InstCombine/select-2.ll
+++ b/llvm/test/Transforms/InstCombine/select-2.ll
@@ -45,7 +45,7 @@ define float @t3(float %x, float %y) {
 
 define i8 @ashr_exact_poison_constant_fold(i1 %b, i8 %x) {
 ; CHECK-LABEL: @ashr_exact_poison_constant_fold(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i8 [[X:%.*]], 3
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[B:%.*]], i8 [[TMP1]], i8 5
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
index 7b9626331ff2907..913ef2a74aebd9a 100644
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -2141,9 +2141,8 @@ define i16 @lshr_and_not_demanded(i8 %x) {
 
 define i16 @lshr_exact_and_not_demanded(i8 %x) {
 ; CHECK-LABEL: @lshr_exact_and_not_demanded(
-; CHECK-NEXT:    [[Y:%.*]] = and i8 [[X:%.*]], -2
-; CHECK-NEXT:    [[Y_EXT:%.*]] = sext i8 [[Y]] to i16
-; CHECK-NEXT:    [[SHR:%.*]] = lshr exact i16 [[Y_EXT]], 1
+; CHECK-NEXT:    [[Y_EXT:%.*]] = sext i8 [[X:%.*]] to i16
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i16 [[Y_EXT]], 1
 ; CHECK-NEXT:    ret i16 [[SHR]]
 ;
   %y = and i8 %x, -2
@@ -2177,8 +2176,7 @@ define i16 @ashr_umax_not_demanded(i16 %x) {
 
 define i16 @ashr_exact_umax_not_demanded(i16 %x) {
 ; CHECK-LABEL: @ashr_exact_umax_not_demanded(
-; CHECK-NEXT:    [[Y:%.*]] = call i16 @llvm.umax.i16(i16 [[X:%.*]], i16 1)
-; CHECK-NEXT:    [[SHR:%.*]] = ashr exact i16 [[Y]], 1
+; CHECK-NEXT:    [[SHR:%.*]] = ashr i16 [[X:%.*]], 1
 ; CHECK-NEXT:    ret i16 [[SHR]]
 ;
   %y = call i16 @llvm.umax.i16(i16 %x, i16 1)