[llvm] [InstCombine] Propagate exact flags in transformation (PR #88340)

Wed Apr 10 19:42:05 PDT 2024

https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/88340

>From 7238109a6dbc5b4fd8a48adf76cf0cedcc61c508 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Wed, 10 Apr 2024 22:38:40 -0400
Subject: [PATCH 1/2] [InstCombine] Pre-commit tests (NFC)

---
 llvm/test/Transforms/InstCombine/lshr.ll | 28 ++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index 02c2bbc2819b8c..29cb605f89d553 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -191,6 +191,34 @@ define <2 x i8> @shl_add_commute_vec(<2 x i8> %x, <2 x i8> %py) {
   ret <2 x i8> %r
 }
 
+define i8 @shl_add2(i8 %x, i8 %y) {
+; CHECK-LABEL: @shl_add2(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[TMP2]], 63
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %l = shl i8 %x, 2
+  %a = add i8 %l, %y
+  %r = lshr exact i8 %a, 2
+  ret i8 %r
+}
+
+define <2 x i8> @shl_add_commute_vec2(<2 x i8> %x, <2 x i8> %py) {
+; CHECK-LABEL: @shl_add_commute_vec2(
+; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], [[PY]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[Y]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[TMP2]], <i8 31, i8 31>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %y = mul <2 x i8> %py, %py ; thwart complexity-based canonicalization
+  %l = shl <2 x i8> %x, <i8 3, i8 3>
+  %a = add <2 x i8> %y, %l
+  %r = lshr exact <2 x i8> %a, <i8 3, i8 3>
+  ret <2 x i8> %r
+}
+
 define i32 @shl_add_use1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @shl_add_use1(
 ; CHECK-NEXT:    [[L:%.*]] = shl i32 [[X:%.*]], 2

>From 75b0cabf37c73b3a6e7179de6a491426e60f3dbc Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Wed, 10 Apr 2024 21:56:13 -0400
Subject: [PATCH 2/2] [InstCombine] Propagate exact flags in transformation

---
 .../InstCombine/InstCombineShifts.cpp           | 17 ++++++++++++-----
 llvm/test/Transforms/InstCombine/lshr.ll        |  4 ++--
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 95aa2119e2d88b..4c3f8b474745fd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1332,7 +1332,7 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
     if (match(Op0,
               m_OneUse(m_c_Add(m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))),
                                m_Value(Y))))) {
-      Value *NewLshr = Builder.CreateLShr(Y, Op1);
+      Value *NewLshr = Builder.CreateLShr(Y, Op1, "", I.isExact());
       Value *NewAdd = Builder.CreateAdd(NewLshr, X);
       unsigned Op1Val = C->getLimitedValue(BitWidth);
       APInt Bits = APInt::getLowBitsSet(BitWidth, BitWidth - Op1Val);
@@ -1395,11 +1395,17 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
     }
 
     // (X >>u C1) >>u C --> X >>u (C1 + C)
-    if (match(Op0, m_LShr(m_Value(X), m_APInt(C1)))) {
+    Instruction *Inst;
+    if (match(Op0, m_Instruction(Inst)) &&
+        match(Inst, m_LShr(m_Value(X), m_APInt(C1)))) {
       // Oversized shifts are simplified to zero in InstSimplify.
       unsigned AmtSum = ShAmtC + C1->getZExtValue();
-      if (AmtSum < BitWidth)
-        return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
+      if (AmtSum < BitWidth) {
+        auto *NewLShr =
+            BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
+        NewLShr->setIsExact(I.isExact() && Inst->isExact());
+        return NewLShr;
+      }
     }
 
     Instruction *TruncSrc;
@@ -1415,7 +1421,8 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
       // mask instruction is eliminated (and so the use check is relaxed).
       if (AmtSum < SrcWidth &&
           (TruncSrc->hasOneUse() || C1->uge(SrcWidth - BitWidth))) {
-        Value *SumShift = Builder.CreateLShr(X, AmtSum, "sum.shift");
+        Value *SumShift = Builder.CreateLShr(
+            X, AmtSum, "sum.shift", TruncSrc->isExact() && I.isExact());
         Value *Trunc = Builder.CreateTrunc(SumShift, Ty, I.getName());
 
         // If the first shift does not cover the number of bits truncated, then
diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index 29cb605f89d553..399961869103fc 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -193,7 +193,7 @@ define <2 x i8> @shl_add_commute_vec(<2 x i8> %x, <2 x i8> %py) {
 
 define i8 @shl_add2(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_add2(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[Y:%.*]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i8 [[Y:%.*]], 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = and i8 [[TMP2]], 63
 ; CHECK-NEXT:    ret i8 [[R]]
@@ -207,7 +207,7 @@ define i8 @shl_add2(i8 %x, i8 %y) {
 define <2 x i8> @shl_add_commute_vec2(<2 x i8> %x, <2 x i8> %py) {
 ; CHECK-LABEL: @shl_add_commute_vec2(
 ; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], [[PY]]
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[Y]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact <2 x i8> [[Y]], <i8 3, i8 3>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[TMP2]], <i8 31, i8 31>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]