[llvm] [InstCombine] Reduce multiplicands of even numbers when a shift is involved (PR #92475)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 16 17:31:07 PDT 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/92475
>From 7d6fb577b378a839d14a26c3a9c0caf818071b76 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Thu, 16 May 2024 14:11:44 -0400
Subject: [PATCH 1/2] [InstCombine] Pre-commit tests (NFC)
---
llvm/test/Transforms/InstCombine/ashr-lshr.ll | 48 +++++++++++++++++++
llvm/test/Transforms/InstCombine/lshr.ll | 48 +++++++++++++++++++
2 files changed, 96 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index ac206dc7999dd..fd5517b2d44b2 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -604,3 +604,51 @@ define <2 x i8> @ashr_known_pos_exact_vec(<2 x i8> %x, <2 x i8> %y) {
%r = ashr exact <2 x i8> %p, %y
ret <2 x i8> %r
}
+
+define i32 @reduce_shift(i32 %x) {
+; CHECK-LABEL: @reduce_shift(
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 12
+; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nsw i32 %x, 12
+ %shr = ashr i32 %mul, 4
+ ret i32 %shr
+}
+
+; Negative test
+
+define i32 @reduce_shift_no_nsw(i32 %x) {
+; CHECK-LABEL: @reduce_shift_no_nsw(
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 12
+; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nuw i32 %x, 12
+ %shr = ashr i32 %mul, 4
+ ret i32 %shr
+}
+
+; Negative test
+
+define i32 @reduce_shift_wrong_mul(i32 %x) {
+; CHECK-LABEL: @reduce_shift_wrong_mul(
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 11
+; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nsw i32 %x, 11
+ %shr = ashr i32 %mul, 4
+ ret i32 %shr
+}
+
+define i32 @reduce_shift_exact(i32 %x) {
+; CHECK-LABEL: @reduce_shift_exact(
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 12
+; CHECK-NEXT: [[SHR:%.*]] = ashr exact i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nsw i32 %x, 12
+ %shr = ashr exact i32 %mul, 4
+ ret i32 %shr
+}
diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index fa92c1c4b3be4..a8a21154d6798 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -1403,3 +1403,51 @@ define <2 x i8> @bool_add_lshr_vec_wrong_shift_amt(<2 x i1> %a, <2 x i1> %b) {
%lshr = lshr <2 x i8> %add, <i8 1, i8 2>
ret <2 x i8> %lshr
}
+
+define i32 @reduce_shift(i32 %x) {
+; CHECK-LABEL: @reduce_shift(
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 12
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nuw i32 %x, 12
+ %shr = lshr i32 %mul, 4
+ ret i32 %shr
+}
+
+; Negative test
+
+define i32 @reduce_shift_no_nuw(i32 %x) {
+; CHECK-LABEL: @reduce_shift_no_nuw(
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 12
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nsw i32 %x, 12
+ %shr = lshr i32 %mul, 4
+ ret i32 %shr
+}
+
+; Negative test
+
+define i32 @reduce_shift_wrong_mul(i32 %x) {
+; CHECK-LABEL: @reduce_shift_wrong_mul(
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 11
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nuw i32 %x, 11
+ %shr = lshr i32 %mul, 4
+ ret i32 %shr
+}
+
+define i32 @reduce_shift_exact(i32 %x) {
+; CHECK-LABEL: @reduce_shift_exact(
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 12
+; CHECK-NEXT: [[SHR:%.*]] = lshr exact i32 [[MUL]], 4
+; CHECK-NEXT: ret i32 [[SHR]]
+;
+ %mul = mul nuw i32 %x, 12
+ %shr = lshr exact i32 %mul, 4
+ ret i32 %shr
+}
>From 200ff14b8ca675fe2850f495a5ea0c490ddcefdc Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Thu, 16 May 2024 13:54:17 -0400
Subject: [PATCH 2/2] [InstCombine] Reduce multiplicands of even numbers when a
shift is involved
We can improve analysis, codegen, and enable other folds if we can take expressions like (x * 6) >> 2 and replace them with (x * 3) >> 1 (assuming no overflow of course).
Because every shift is a division of 2, we can replace a multiplication with an even number with that number divided by 2 and require one less shift, and keep going until we get 0 or an odd number for the multiplicand.
Alive2 Proofs:
https://alive2.llvm.org/ce/z/C9FvwB
https://alive2.llvm.org/ce/z/7Zsx3b
---
.../InstCombine/InstCombineShifts.cpp | 65 ++++++++++++++++---
llvm/test/Transforms/InstCombine/ashr-lshr.ll | 8 +--
llvm/test/Transforms/InstCombine/lshr.ll | 12 ++--
3 files changed, 65 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index ba297111d945f..5b9a6ee24cb05 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1469,17 +1469,32 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
// able to invert the transform and perf may suffer with an extra mul
// instruction.
if (Op0->hasOneUse()) {
- APInt NewMulC = MulC->lshr(ShAmtC);
- // if c is divisible by (1 << ShAmtC):
- // lshr (mul nuw x, MulC), ShAmtC -> mul nuw nsw x, (MulC >> ShAmtC)
- if (MulC->eq(NewMulC.shl(ShAmtC))) {
- auto *NewMul =
- BinaryOperator::CreateNUWMul(X, ConstantInt::get(Ty, NewMulC));
- assert(ShAmtC != 0 &&
- "lshr X, 0 should be handled by simplifyLShrInst.");
- NewMul->setHasNoSignedWrap(true);
- return NewMul;
+ unsigned CommonZeros = std::min(MulC->countr_zero(), ShAmtC);
+ if (CommonZeros != 0) {
+ APInt NewMulC = MulC->lshr(CommonZeros);
+ unsigned NewShAmtC = ShAmtC - CommonZeros;
+ // if c is divisible by (1 << ShAmtC):
+ // lshr (mul nuw x, MulC), ShAmtC -> mul nuw nsw x, (MulC >> ShAmtC)
+ if (NewShAmtC == 0) {
+ auto *NewMul =
+ BinaryOperator::CreateNUWMul(X, ConstantInt::get(Ty, NewMulC));
+ NewMul->setHasNoSignedWrap(true);
+ return NewMul;
+ }
+
+ // We can reduce things like lshr (mul nuw x, 6), 2 to lshr (mul nuw
+ // nsw x, 3), 1
+ // TODO: What about if ALL uses can be simplified in this way? Is that
+ // likely enough to happen to justify even caring?
+ auto *NewMul = Builder.CreateMul(X, ConstantInt::get(Ty, NewMulC), "",
+ /*NUW*/ true, /*NSW*/ true);
+ auto *NewLshr = BinaryOperator::CreateLShr(
+ NewMul, ConstantInt::get(Ty, NewShAmtC));
+ NewLshr->copyIRFlags(&I); // We can preserve 'exact'-ness.
+ return NewLshr;
}
+ assert(ShAmtC != 0 &&
+ "lshr X, 0 should be handled by simplifyLShrInst.");
}
}
@@ -1667,6 +1682,36 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
return BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum));
}
+ if (match(Op0, m_OneUse(m_NSWMul(m_Value(X), m_APInt(ShOp1))))) {
+ unsigned CommonZeros = std::min(ShOp1->countr_zero(), ShAmt);
+ if (CommonZeros != 0) {
+ APInt NewMulC = ShOp1->ashr(CommonZeros);
+ unsigned NewShAmtC = ShAmt - CommonZeros;
+ // if c is divisible by (1 << ShAmtC):
+ // ashr (mul nuw x, MulC), ShAmtC -> mul nuw nsw x, (MulC >> ShAmtC)
+ if (NewShAmtC == 0) {
+ auto *NewMul =
+ BinaryOperator::CreateNSWMul(X, ConstantInt::get(Ty, NewMulC));
+ NewMul->setHasNoUnsignedWrap(
+ cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap());
+ return NewMul;
+ }
+
+ // We can reduce things like ashr (mul nuw x, 6), 2 to ashr (mul nuw
+ // nsw x, 3), 1
+ // TODO: What about if ALL uses can be simplified in this way? Is that
+ // likely enough to happen to justify even caring?
+ auto *NewMul = Builder.CreateMul(
+ X, ConstantInt::get(Ty, NewMulC), "",
+ /*NUW*/ cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap(),
+ /*NSW*/ true);
+ auto *NewAshr =
+ BinaryOperator::CreateAShr(NewMul, ConstantInt::get(Ty, NewShAmtC));
+ NewAshr->copyIRFlags(&I); // We can preserve 'exact'-ness.
+ return NewAshr;
+ }
+ }
+
if (match(Op0, m_OneUse(m_SExt(m_Value(X)))) &&
(Ty->isVectorTy() || shouldChangeType(Ty, X->getType()))) {
// ashr (sext X), C --> sext (ashr X, C')
diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index fd5517b2d44b2..150783f0ac316 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -607,8 +607,8 @@ define <2 x i8> @ashr_known_pos_exact_vec(<2 x i8> %x, <2 x i8> %y) {
define i32 @reduce_shift(i32 %x) {
; CHECK-LABEL: @reduce_shift(
-; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 12
-; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i32 [[X:%.*]], 3
+; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[TMP1]], 2
; CHECK-NEXT: ret i32 [[SHR]]
;
%mul = mul nsw i32 %x, 12
@@ -644,8 +644,8 @@ define i32 @reduce_shift_wrong_mul(i32 %x) {
define i32 @reduce_shift_exact(i32 %x) {
; CHECK-LABEL: @reduce_shift_exact(
-; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[X:%.*]], 12
-; CHECK-NEXT: [[SHR:%.*]] = ashr exact i32 [[MUL]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i32 [[X:%.*]], 3
+; CHECK-NEXT: [[SHR:%.*]] = ashr exact i32 [[TMP1]], 2
; CHECK-NEXT: ret i32 [[SHR]]
;
%mul = mul nsw i32 %x, 12
diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index a8a21154d6798..fd4a568a473e3 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -591,8 +591,8 @@ define i32 @shl_add_lshr_neg(i32 %x, i32 %y, i32 %z) {
define i32 @mul_splat_fold_wrong_mul_const(i32 %x) {
; CHECK-LABEL: @mul_splat_fold_wrong_mul_const(
-; CHECK-NEXT: [[M:%.*]] = mul nuw i32 [[X:%.*]], 65538
-; CHECK-NEXT: [[T:%.*]] = lshr i32 [[M]], 16
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[X:%.*]], 32769
+; CHECK-NEXT: [[T:%.*]] = lshr i32 [[TMP1]], 15
; CHECK-NEXT: ret i32 [[T]]
;
%m = mul nuw i32 %x, 65538
@@ -1406,8 +1406,8 @@ define <2 x i8> @bool_add_lshr_vec_wrong_shift_amt(<2 x i1> %a, <2 x i1> %b) {
define i32 @reduce_shift(i32 %x) {
; CHECK-LABEL: @reduce_shift(
-; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 12
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[X:%.*]], 3
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP1]], 2
; CHECK-NEXT: ret i32 [[SHR]]
;
%mul = mul nuw i32 %x, 12
@@ -1443,8 +1443,8 @@ define i32 @reduce_shift_wrong_mul(i32 %x) {
define i32 @reduce_shift_exact(i32 %x) {
; CHECK-LABEL: @reduce_shift_exact(
-; CHECK-NEXT: [[MUL:%.*]] = mul nuw i32 [[X:%.*]], 12
-; CHECK-NEXT: [[SHR:%.*]] = lshr exact i32 [[MUL]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[X:%.*]], 3
+; CHECK-NEXT: [[SHR:%.*]] = lshr exact i32 [[TMP1]], 2
; CHECK-NEXT: ret i32 [[SHR]]
;
%mul = mul nuw i32 %x, 12
More information about the llvm-commits
mailing list