[llvm] [InstCombine] Fold mul (lshr exact (X, 2^N + 1)), N -> add (X , lshr (X, N)) (PR #95042)

Mon Jun 10 14:12:08 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: AtariDreams (AtariDreams)

<details>
<summary>Changes</summary>

Alive2 Proof:
https://alive2.llvm.org/ce/z/LVqGEo

---
Full diff: https://github.com/llvm/llvm-project/pull/95042.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (+36) 
- (modified) llvm/test/Transforms/InstCombine/ashr-lshr.ll (+130) 


``````````diff

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index ca1b1921404d8..9248c902c1e90 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -255,6 +255,42 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
     }
   }
 
+  {
+    // mul (lshr exact (X, 2^N + 1)), N -> add (X , lshr (X, N))
+    Value *NewOp;
+    const APInt *ShiftC;
+    const APInt *MulAP;
+    if (match(&I, m_Mul(m_CombineOr(m_LShr(m_Value(NewOp), m_APInt(ShiftC)),
+                                    m_AShr(m_Value(NewOp), m_APInt(ShiftC))),
+                        m_APInt(MulAP)))) {
+      if (BitWidth > 2 && (*MulAP - 1).isPowerOf2() &&
+          MulAP->logBase2() == ShiftC->getZExtValue()) {
+        BinaryOperator *OpBO = cast<BinaryOperator>(Op0);
+        if (OpBO->isExact()) {
+          Value *AddOp;
+          if (!HasNUW && !HasNUW)
+            AddOp = Builder.CreateFreeze(NewOp);
+          else
+            AddOp = NewOp;
+
+          Value *BinOp;
+          if (OpBO->getOpcode() == Instruction::LShr ||
+              (OpBO->getOpcode() == Instruction::AShr && HasNUW)) {
+            BinOp = Builder.CreateLShr(
+                AddOp, ConstantInt::get(Ty, ShiftC->getZExtValue()), "", true);
+          } else {
+            BinOp = Builder.CreateAShr(
+                AddOp, ConstantInt::get(Ty, ShiftC->getZExtValue()), "", true);
+          }
+
+          auto *NewAdd = BinaryOperator::CreateAdd(AddOp, BinOp);
+          NewAdd->copyIRFlags(&I);
+          return NewAdd;
+        }
+      }
+    }
+  }
+
   if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) {
     // Interpret  X * (-1<<C)  as  (-X) * (1<<C)  and try to sink the negation.
     // The "* (1<<C)" thus becomes a potential shifting opportunity.
diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index c2a4f35412670..3309b18e0f11a 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -862,4 +862,134 @@ define i32 @ashr_mul_times_5_div_4_exact_2(i32 %x) {
   ret i32 %ashr
 }
 
+define i32 @ashr_shift_mul(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  %res = mul i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @ashr_shift_mul_nuw(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  %res = mul nuw i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @ashr_shift_mul_nsw(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @lshr_shift_mul_nuw(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  %res = mul nuw i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @lshr_shift_mul(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  %res = mul i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @lshr_shift_mul_nsw(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @lshr_no_exact(i32 %x) {
+; CHECK-LABEL: @lshr_no_exact(
+; CHECK-NEXT:    [[A:%.*]] = lshr i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = mul nuw nsw i32 [[A]], 9
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @ashr_no_exact(i32 %x) {
+; CHECK-LABEL: @ashr_no_exact(
+; CHECK-NEXT:    [[A:%.*]] = ashr i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = mul nsw i32 [[A]], 9
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @lshr_multiuse(i32 %x) {
+; CHECK-LABEL: @lshr_multiuse(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    call void @use(i32 [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  call void @use(i32 %a)
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @ashr_multiuse(i32 %x) {
+; CHECK-LABEL: @ashr_multiuse(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    call void @use(i32 [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  call void @use(i32 %a)
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
 declare void @use(i32)

``````````

</details>


https://github.com/llvm/llvm-project/pull/95042