[llvm] [InstCombine] Propagate exact flags in shift-combine transforms (PR #88340)

Sat Jun 8 20:37:42 PDT 2024

https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/88340

>From 64756e01167cb16fe846e71d124bc117ca86e69d Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Wed, 10 Apr 2024 22:38:40 -0400
Subject: [PATCH] [InstCombine] Propagate exact flags in shift-combine
 transforms

There were a couple of places where we could propagate exact flags but did not.
This patch addresses some of those places.

Alive 2 Proofs:
https://alive2.llvm.org/ce/z/vmoZrX
https://alive2.llvm.org/ce/z/9zxKKA
https://alive2.llvm.org/ce/z/HJebVu
https://alive2.llvm.org/ce/z/96ez9n

Update lshr.ll
---
 .../InstCombine/InstCombineShifts.cpp         | 24 ++++---
 llvm/test/Transforms/InstCombine/cast.ll      | 14 ++--
 llvm/test/Transforms/InstCombine/lshr.ll      | 28 ++++++++
 llvm/test/Transforms/InstCombine/shift.ll     | 70 +++++++++++++++++++
 4 files changed, 121 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 4a014ab6e044e..8d378f1f709f8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1394,8 +1394,11 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
     if (match(Op0,
               m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))),
                                m_Value(Y))))) {
-      Value *NewLshr = Builder.CreateLShr(Y, Op1);
-      Value *NewAdd = Builder.CreateAdd(NewLshr, X);
+      Value *NewLshr = Builder.CreateLShr(Y, Op1, "", I.isExact());
+      Value *NewAdd = Builder.CreateAdd(
+          NewLshr, X, "",
+          cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap(),
+          cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap());
       unsigned Op1Val = C->getLimitedValue(BitWidth);
       APInt Bits = APInt::getLowBitsSet(BitWidth, BitWidth - Op1Val);
       Constant *Mask = ConstantInt::get(Ty, Bits);
@@ -1407,7 +1410,7 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
       assert(ShAmtC < X->getType()->getScalarSizeInBits() &&
              "Big shift not simplified to zero?");
       // lshr (zext iM X to iN), C --> zext (lshr X, C) to iN
-      Value *NewLShr = Builder.CreateLShr(X, ShAmtC);
+      Value *NewLShr = Builder.CreateLShr(X, ShAmtC, "", I.isExact());
       return new ZExtInst(NewLShr, Ty);
     }
 
@@ -1433,7 +1436,7 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
         if (ShAmtC == BitWidth - SrcTyBitWidth) {
           // The new shift amount can't be more than the narrow source type.
           unsigned NewShAmt = std::min(ShAmtC, SrcTyBitWidth - 1);
-          Value *AShr = Builder.CreateAShr(X, NewShAmt);
+          Value *AShr = Builder.CreateAShr(X, NewShAmt, "", I.isExact());
           return new ZExtInst(AShr, Ty);
         }
       }
@@ -1469,9 +1472,11 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
       // mask instruction is eliminated (and so the use check is relaxed).
       if (AmtSum < SrcWidth &&
           (TruncSrc->hasOneUse() || C1->uge(SrcWidth - BitWidth))) {
-        Value *SumShift = Builder.CreateLShr(X, AmtSum, "sum.shift");
-        Value *Trunc = Builder.CreateTrunc(SumShift, Ty, I.getName());
-
+        Value *SumShift = Builder.CreateLShr(
+            X, AmtSum, "sum.shift", TruncSrc->isExact() && I.isExact());
+        Value *Trunc = Builder.CreateTrunc(SumShift, Ty, I.getName(),
+                                           TruncSrc->hasNoUnsignedWrap(),
+                                           TruncSrc->hasNoSignedWrap());
         // If the first shift does not cover the number of bits truncated, then
         // we require a mask to get rid of high bits in the result.
         APInt MaskC = APInt::getAllOnes(BitWidth).lshr(ShAmtC);
@@ -1709,7 +1714,10 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
       // Oversized arithmetic shifts replicate the sign bit.
       AmtSum = std::min(AmtSum, BitWidth - 1);
       // (X >>s C1) >>s C2 --> X >>s (C1 + C2)
-      return BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum));
+      Instruction *NewAshr =
+          BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum));
+      NewAshr->setIsExact(I.isExact() && cast<BinaryOperator>(Op0)->isExact());
+      return NewAshr;
     }
 
     if (match(Op0, m_OneUse(m_SExt(m_Value(X)))) &&
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index 6cee7bb650fd3..6abf97a7097fa 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -512,8 +512,8 @@ define <2 x i16> @test40vec_poison(<2 x i16> %a) {
 ; ALL-LABEL: @test40vec_poison(
 ; ALL-NEXT:    [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 9, i16 poison>
 ; ALL-NEXT:    [[T5:%.*]] = shl <2 x i16> [[A]], <i16 8, i16 poison>
-; ALL-NEXT:    [[R:%.*]] = or disjoint <2 x i16> [[T21]], [[T5]]
-; ALL-NEXT:    ret <2 x i16> [[R]]
+; ALL-NEXT:    [[T32:%.*]] = or disjoint <2 x i16> [[T21]], [[T5]]
+; ALL-NEXT:    ret <2 x i16> [[T32]]
 ;
   %t = zext <2 x i16> %a to <2 x i32>
   %t21 = lshr <2 x i32> %t, <i32 9, i32 poison>
@@ -1993,7 +1993,7 @@ define i8 @trunc_lshr_zext(i8 %A) {
 
 define i8 @trunc_lshr_zext_exact(i8 %A) {
 ; ALL-LABEL: @trunc_lshr_zext_exact(
-; ALL-NEXT:    [[TMP1:%.*]] = lshr i8 [[A:%.*]], 6
+; ALL-NEXT:    [[TMP1:%.*]] = lshr exact i8 [[A:%.*]], 6
 ; ALL-NEXT:    ret i8 [[TMP1]]
 ;
   %B = zext i8 %A to i32
@@ -2015,8 +2015,8 @@ define <2 x i8> @trunc_lshr_zext_uniform(<2 x i8> %A) {
 
 define <2 x i8> @trunc_lshr_zext_uniform_poison(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_zext_uniform_poison(
-; ALL-NEXT:    [[D:%.*]] = lshr <2 x i8> [[A:%.*]], <i8 6, i8 poison>
-; ALL-NEXT:    ret <2 x i8> [[D]]
+; ALL-NEXT:    [[C:%.*]] = lshr <2 x i8> [[A:%.*]], <i8 6, i8 poison>
+; ALL-NEXT:    ret <2 x i8> [[C]]
 ;
   %B = zext <2 x i8> %A to <2 x i32>
   %C = lshr <2 x i32> %B, <i32 6, i32 poison>
@@ -2037,8 +2037,8 @@ define <2 x i8> @trunc_lshr_zext_nonuniform(<2 x i8> %A) {
 
 define <3 x i8> @trunc_lshr_zext_nonuniform_poison(<3 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_zext_nonuniform_poison(
-; ALL-NEXT:    [[D:%.*]] = lshr <3 x i8> [[A:%.*]], <i8 6, i8 2, i8 poison>
-; ALL-NEXT:    ret <3 x i8> [[D]]
+; ALL-NEXT:    [[C:%.*]] = lshr <3 x i8> [[A:%.*]], <i8 6, i8 2, i8 poison>
+; ALL-NEXT:    ret <3 x i8> [[C]]
 ;
   %B = zext <3 x i8> %A to <3 x i32>
   %C = lshr <3 x i32> %B, <i32 6, i32 2, i32 poison>
diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index 60f03733ebd99..a78d493c897d1 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -202,6 +202,34 @@ define <2 x i8> @shl_add_commute_vec(<2 x i8> %x, <2 x i8> %py) {
   ret <2 x i8> %r
 }
 
+define i8 @shl_add2(i8 %x, i8 %y) {
+; CHECK-LABEL: @shl_add2(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i8 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[TMP2]], 63
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %l = shl i8 %x, 2
+  %a = add i8 %l, %y
+  %r = lshr exact i8 %a, 2
+  ret i8 %r
+}
+
+define <2 x i8> @shl_add_commute_vec2(<2 x i8> %x, <2 x i8> %py) {
+; CHECK-LABEL: @shl_add_commute_vec2(
+; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], [[PY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact <2 x i8> [[Y]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[TMP2]], <i8 31, i8 31>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %y = mul <2 x i8> %py, %py ; thwart complexity-based canonicalization
+  %l = shl <2 x i8> %x, <i8 3, i8 3>
+  %a = add <2 x i8> %y, %l
+  %r = lshr exact <2 x i8> %a, <i8 3, i8 3>
+  ret <2 x i8> %r
+}
+
 define i32 @shl_add_use1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @shl_add_use1(
 ; CHECK-NEXT:    [[L:%.*]] = shl i32 [[X:%.*]], 2
diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
index 0700d7a62ee15..113c43d3c2ff9 100644
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -433,6 +433,48 @@ entry:
   ret i32 %i10
 }
 
+define i32 @test29_exact(i64 %d18) {
+; CHECK-LABEL: @test29_exact(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUM_SHIFT:%.*]] = lshr exact i64 [[D18:%.*]], 63
+; CHECK-NEXT:    [[I101:%.*]] = trunc nuw nsw i64 [[SUM_SHIFT]] to i32
+; CHECK-NEXT:    ret i32 [[I101]]
+;
+entry:
+  %i916 = lshr exact i64 %d18, 32
+  %i917 = trunc i64 %i916 to i32
+  %i10 = lshr exact i32 %i917, 31
+  ret i32 %i10
+}
+
+define i32 @test29_exact_fail(i64 %d18) {
+; CHECK-LABEL: @test29_exact_fail(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUM_SHIFT:%.*]] = lshr i64 [[D18:%.*]], 63
+; CHECK-NEXT:    [[I101:%.*]] = trunc nuw nsw i64 [[SUM_SHIFT]] to i32
+; CHECK-NEXT:    ret i32 [[I101]]
+;
+entry:
+  %i916 = lshr exact i64 %d18, 32
+  %i917 = trunc i64 %i916 to i32
+  %i10 = lshr i32 %i917, 31
+  ret i32 %i10
+}
+
+define i32 @test29_exact_fail2(i64 %d18) {
+; CHECK-LABEL: @test29_exact_fail2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUM_SHIFT:%.*]] = lshr i64 [[D18:%.*]], 63
+; CHECK-NEXT:    [[I101:%.*]] = trunc nuw nsw i64 [[SUM_SHIFT]] to i32
+; CHECK-NEXT:    ret i32 [[I101]]
+;
+entry:
+  %i916 = lshr i64 %d18, 32
+  %i917 = trunc i64 %i916 to i32
+  %i10 = lshr exact i32 %i917, 31
+  ret i32 %i10
+}
+
 define <2 x i32> @test29_uniform(<2 x i64> %d18) {
 ; CHECK-LABEL: @test29_uniform(
 ; CHECK-NEXT:  entry:
@@ -447,6 +489,34 @@ entry:
   ret <2 x i32> %i10
 }
 
+define <2 x i32> @test29_uniform_exact(<2 x i64> %d18) {
+; CHECK-LABEL: @test29_uniform_exact(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUM_SHIFT:%.*]] = lshr exact <2 x i64> [[D18:%.*]], <i64 63, i64 63>
+; CHECK-NEXT:    [[I101:%.*]] = trunc nuw nsw <2 x i64> [[SUM_SHIFT]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[I101]]
+;
+entry:
+  %i916 = lshr exact <2 x i64> %d18, <i64 32, i64 32>
+  %i917 = trunc <2 x i64> %i916 to <2 x i32>
+  %i10 = lshr exact <2 x i32> %i917, <i32 31, i32 31>
+  ret <2 x i32> %i10
+}
+
+define <2 x i32> @test29_uniform_exact_fail(<2 x i64> %d18) {
+; CHECK-LABEL: @test29_uniform_exact_fail(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUM_SHIFT:%.*]] = lshr <2 x i64> [[D18:%.*]], <i64 63, i64 63>
+; CHECK-NEXT:    [[I101:%.*]] = trunc nuw nsw <2 x i64> [[SUM_SHIFT]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[I101]]
+;
+entry:
+  %i916 = lshr <2 x i64> %d18, <i64 32, i64 32>
+  %i917 = trunc <2 x i64> %i916 to <2 x i32>
+  %i10 = lshr exact <2 x i32> %i917, <i32 31, i32 31>
+  ret <2 x i32> %i10
+}
+
 define <2 x i32> @test29_nonuniform(<2 x i64> %d18) {
 ; CHECK-LABEL: @test29_nonuniform(
 ; CHECK-NEXT:  entry: