[llvm] [InstCombine] Factorise Add and Min/Max using Distributivity (PR #101717)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 2 10:25:16 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-llvm-transforms
Author: Jorge Botto (jf-botto)
<details>
<summary>Changes</summary>
This PR fixes part of https://github.com/llvm/llvm-project/issues/92433.
It specifically adds the 4 cases mentioned in https://github.com/llvm/llvm-project/issues/92433#issuecomment-2117064459.
I've added 8 positive tests, 4 of which are mentioned in the comment above and 4 which are their commutative equivalents. Alive proof: https://alive2.llvm.org/ce/z/z6eFTb
I've also added 8 negative tests, because we want to make sure we do not optimise if the relevant flags are not relevant because the optimisation wouldn't be sound. Alive proof: https://alive2.llvm.org/ce/z/NvNjTD
I did have to make the integer types `i4` to make Alive not timeout and to fit them all on one page.
---
Full diff: https://github.com/llvm/llvm-project/pull/101717.diff
3 Files Affected:
- (modified) llvm/include/llvm/IR/Operator.h (+3)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+94)
- (added) llvm/test/Transforms/InstCombine/intrinsic-distributive.ll (+228)
``````````diff
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index f63f54ef94107..ec8b3f4b6318f 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -123,6 +123,9 @@ class OverflowingBinaryOperator : public Operator {
return NoWrapKind;
}
+ /// Return true if the instruction is commutative:
+ bool isCommutative() const { return Instruction::isCommutative(getOpcode()); }
+
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::Add ||
I->getOpcode() == Instruction::Sub ||
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cc68fd4cf1c1b..8944eec2d63d4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1505,6 +1505,97 @@ foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
ConstantInt::getTrue(ZeroUndef->getType()));
}
+/// Return whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool leftDistributesOverRightIntrinsic(Instruction::BinaryOps LOp,
+ bool hasNUW, bool hasNSW,
+ Intrinsic::ID ROp) {
+ switch (ROp) {
+ case Intrinsic::umax:
+ return hasNUW && LOp == Instruction::Add;
+ case Intrinsic::umin:
+ return hasNUW && LOp == Instruction::Add;
+ case Intrinsic::smax:
+ return hasNSW && LOp == Instruction::Add;
+ case Intrinsic::smin:
+ return hasNSW && LOp == Instruction::Add;
+ default:
+ return false;
+ }
+}
+
+// Attempts to factorise a common term
+// in an instruction that has the form "(A op' B) op (C op' D)
+// where op is an intrinsic and op' is a binop
+static Value *
+foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
+ Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
+
+ OverflowingBinaryOperator *Op0 = dyn_cast<OverflowingBinaryOperator>(LHS);
+ OverflowingBinaryOperator *Op1 = dyn_cast<OverflowingBinaryOperator>(RHS);
+
+ if (!Op0 || !Op1)
+ return nullptr;
+
+ if (Op0->getOpcode() != Op1->getOpcode())
+ return nullptr;
+
+ if (!(Op0->hasNoUnsignedWrap() == Op1->hasNoUnsignedWrap()) ||
+ !(Op0->hasNoSignedWrap() == Op1->hasNoSignedWrap()))
+ return nullptr;
+
+ if (!Op0->hasOneUse() || !Op1->hasOneUse())
+ return nullptr;
+
+ Instruction::BinaryOps InnerOpcode =
+ static_cast<Instruction::BinaryOps>(Op0->getOpcode());
+ bool HasNUW = Op0->hasNoUnsignedWrap();
+ bool HasNSW = Op0->hasNoSignedWrap();
+
+ if (!InnerOpcode)
+ return nullptr;
+
+ if (!leftDistributesOverRightIntrinsic(InnerOpcode, HasNUW, HasNSW,
+ TopLevelOpcode))
+ return nullptr;
+
+ assert(II->isCommutative() && Op0->isCommutative() &&
+ "Only inner and outer commutative op codes are supported.");
+
+ Value *A = Op0->getOperand(0);
+ Value *B = Op0->getOperand(1);
+ Value *C = Op1->getOperand(0);
+ Value *D = Op1->getOperand(1);
+
+ if (A == C || A == D) {
+ if (A != C)
+ std::swap(C, D);
+
+ Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
+ BinaryOperator *NewBinop =
+ cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, A));
+ NewBinop->setHasNoSignedWrap(HasNSW);
+ NewBinop->setHasNoUnsignedWrap(HasNUW);
+ return NewBinop;
+ }
+
+ if (B == D || B == C) {
+ if (B != D)
+ std::swap(C, D);
+
+ Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
+ BinaryOperator *NewBinop =
+ cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
+ NewBinop->setHasNoSignedWrap(HasNSW);
+ NewBinop->setHasNoUnsignedWrap(HasNUW);
+ return NewBinop;
+ }
+
+ return nullptr;
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
@@ -1929,6 +2020,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
}
+ if (Value *V = foldIntrinsicUsingDistributiveLaws(II, Builder))
+ return replaceInstUsesWith(*II, V);
+
break;
}
case Intrinsic::bitreverse: {
diff --git a/llvm/test/Transforms/InstCombine/intrinsic-distributive.ll b/llvm/test/Transforms/InstCombine/intrinsic-distributive.ll
new file mode 100644
index 0000000000000..f58ce04cb6711
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intrinsic-distributive.ll
@@ -0,0 +1,228 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s 2>&1 | FileCheck %s
+
+
+define i8 @umax_of_add_nuw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umax_of_add_nuw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT: [[MAX:%.*]] = add nuw i8 [[TMP1]], [[A]]
+; CHECK-NEXT: ret i8 [[MAX]]
+;
+ %add1 = add nuw i8 %b, %a
+ %add2 = add nuw i8 %c, %a
+ %max = call i8 @llvm.umax.i8(i8 %add1, i8 %add2)
+ ret i8 %max
+}
+
+define i8 @umax_of_add_nuw_comm(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umax_of_add_nuw_comm(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT: [[MAX:%.*]] = add nuw i8 [[TMP1]], [[A]]
+; CHECK-NEXT: ret i8 [[MAX]]
+;
+ %add1 = add nuw i8 %a, %b
+ %add2 = add nuw i8 %a, %c
+ %max = call i8 @llvm.umax.i8(i8 %add1, i8 %add2)
+ ret i8 %max
+}
+
+
+; negative test
+define i8 @umax_of_add_nsw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umax_of_add_nsw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[ADD1:%.*]] = add nsw i8 [[B]], [[A]]
+; CHECK-NEXT: [[ADD2:%.*]] = add nsw i8 [[C]], [[A]]
+; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.umax.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT: ret i8 [[MAX]]
+;
+ %add1 = add nsw i8 %b, %a
+ %add2 = add nsw i8 %c, %a
+ %max = call i8 @llvm.umax.i8(i8 %add1, i8 %add2)
+ ret i8 %max
+}
+
+; negative test
+define i8 @umax_of_add(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umax_of_add(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[ADD1:%.*]] = add i8 [[B]], [[A]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i8 [[C]], [[A]]
+; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.umax.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT: ret i8 [[MAX]]
+;
+ %add1 = add i8 %b, %a
+ %add2 = add i8 %c, %a
+ %max = call i8 @llvm.umax.i8(i8 %add1, i8 %add2)
+ ret i8 %max
+}
+
+define i8 @umin_of_add_nuw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umin_of_add_nuw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT: [[MIN:%.*]] = add nuw i8 [[TMP1]], [[A]]
+; CHECK-NEXT: ret i8 [[MIN]]
+;
+ %add1 = add nuw i8 %b, %a
+ %add2 = add nuw i8 %c, %a
+ %min = call i8 @llvm.umin.i8(i8 %add1, i8 %add2)
+ ret i8 %min
+}
+
+define i8 @umin_of_add_nuw_comm(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umin_of_add_nuw_comm(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT: [[MIN:%.*]] = add nuw i8 [[TMP1]], [[A]]
+; CHECK-NEXT: ret i8 [[MIN]]
+;
+ %add1 = add nuw i8 %a, %b
+ %add2 = add nuw i8 %a, %c
+ %min = call i8 @llvm.umin.i8(i8 %add1, i8 %add2)
+ ret i8 %min
+}
+
+; negative test
+define i8 @umin_of_add_nsw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umin_of_add_nsw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[ADD1:%.*]] = add nsw i8 [[B]], [[A]]
+; CHECK-NEXT: [[ADD2:%.*]] = add nsw i8 [[C]], [[A]]
+; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.umin.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT: ret i8 [[MIN]]
+;
+ %add1 = add nsw i8 %b, %a
+ %add2 = add nsw i8 %c, %a
+ %min = call i8 @llvm.umin.i8(i8 %add1, i8 %add2)
+ ret i8 %min
+}
+
+; negative test
+define i8 @umin_of_add(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umin_of_add(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[ADD1:%.*]] = add i8 [[B]], [[A]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i8 [[C]], [[A]]
+; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.umin.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT: ret i8 [[MIN]]
+;
+ %add1 = add i8 %b, %a
+ %add2 = add i8 %c, %a
+ %min = call i8 @llvm.umin.i8(i8 %add1, i8 %add2)
+ ret i8 %min
+}
+
+; negative test
+define i8 @smax_of_add_nuw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smax_of_add_nuw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[ADD1:%.*]] = add nuw i8 [[B]], [[A]]
+; CHECK-NEXT: [[ADD2:%.*]] = add nuw i8 [[C]], [[A]]
+; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT: ret i8 [[MAX]]
+;
+ %add1 = add nuw i8 %b, %a
+ %add2 = add nuw i8 %c, %a
+ %max = call i8 @llvm.smax.i8(i8 %add1, i8 %add2)
+ ret i8 %max
+}
+
+define i8 @smax_of_add_nsw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smax_of_add_nsw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT: [[MAX:%.*]] = add nsw i8 [[TMP1]], [[A]]
+; CHECK-NEXT: ret i8 [[MAX]]
+;
+ %add1 = add nsw i8 %b, %a
+ %add2 = add nsw i8 %c, %a
+ %max = call i8 @llvm.smax.i8(i8 %add1, i8 %add2)
+ ret i8 %max
+}
+
+define i8 @smax_of_add_nsw_comm(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smax_of_add_nsw_comm(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT: [[MAX:%.*]] = add nsw i8 [[TMP1]], [[A]]
+; CHECK-NEXT: ret i8 [[MAX]]
+;
+ %add1 = add nsw i8 %a, %b
+ %add2 = add nsw i8 %a, %c
+ %max = call i8 @llvm.smax.i8(i8 %add1, i8 %add2)
+ ret i8 %max
+}
+
+; negative test
+define i8 @smax_of_add(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smax_of_add(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[ADD1:%.*]] = add i8 [[B]], [[A]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i8 [[C]], [[A]]
+; CHECK-NEXT: [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT: ret i8 [[MAX]]
+;
+ %add1 = add i8 %b, %a
+ %add2 = add i8 %c, %a
+ %max = call i8 @llvm.smax.i8(i8 %add1, i8 %add2)
+ ret i8 %max
+}
+
+; negative test
+define i8 @smin_of_add_nuw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smin_of_add_nuw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[ADD1:%.*]] = add nuw i8 [[B]], [[A]]
+; CHECK-NEXT: [[ADD2:%.*]] = add nuw i8 [[C]], [[A]]
+; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.smin.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT: ret i8 [[MIN]]
+;
+ %add1 = add nuw i8 %b, %a
+ %add2 = add nuw i8 %c, %a
+ %min = call i8 @llvm.smin.i8(i8 %add1, i8 %add2)
+ ret i8 %min
+}
+
+define i8 @smin_of_add_nsw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smin_of_add_nsw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT: [[MIN:%.*]] = add nsw i8 [[TMP1]], [[A]]
+; CHECK-NEXT: ret i8 [[MIN]]
+;
+ %add1 = add nsw i8 %b, %a
+ %add2 = add nsw i8 %c, %a
+ %min = call i8 @llvm.smin.i8(i8 %add1, i8 %add2)
+ ret i8 %min
+}
+
+define i8 @smin_of_add_nsw_comm(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smin_of_add_nsw_comm(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT: [[MIN:%.*]] = add nsw i8 [[TMP1]], [[A]]
+; CHECK-NEXT: ret i8 [[MIN]]
+;
+ %add1 = add nsw i8 %a, %b
+ %add2 = add nsw i8 %a, %c
+ %min = call i8 @llvm.smin.i8(i8 %add1, i8 %add2)
+ ret i8 %min
+}
+
+; negative test
+define i8 @smin_of_add(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smin_of_add(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT: [[ADD1:%.*]] = add i8 [[B]], [[A]]
+; CHECK-NEXT: [[ADD2:%.*]] = add i8 [[C]], [[A]]
+; CHECK-NEXT: [[MIN:%.*]] = call i8 @llvm.smin.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT: ret i8 [[MIN]]
+;
+ %add1 = add i8 %b, %a
+ %add2 = add i8 %c, %a
+ %min = call i8 @llvm.smin.i8(i8 %add1, i8 %add2)
+ ret i8 %min
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/101717
More information about the llvm-commits
mailing list