[llvm] [InstCombine] Factorise Add and Min/Max using Distributivity (PR #101717)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 2 10:25:16 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-llvm-transforms

Author: Jorge Botto (jf-botto)

<details>
<summary>Changes</summary>

This PR fixes part of https://github.com/llvm/llvm-project/issues/92433.

It specifically adds the 4 cases mentioned in https://github.com/llvm/llvm-project/issues/92433#issuecomment-2117064459.

I've added 8 positive tests, 4 of which are mentioned in the comment above and 4 which are their commutative equivalents. Alive proof: https://alive2.llvm.org/ce/z/z6eFTb
I've also added 8 negative tests, because we want to make sure we do not optimise if the relevant flags are not relevant because the optimisation wouldn't be sound.  Alive proof: https://alive2.llvm.org/ce/z/NvNjTD 
I did have to make the integer types `i4` to make Alive not timeout and to fit them all on one page.

---
Full diff: https://github.com/llvm/llvm-project/pull/101717.diff


3 Files Affected:

- (modified) llvm/include/llvm/IR/Operator.h (+3) 
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+94) 
- (added) llvm/test/Transforms/InstCombine/intrinsic-distributive.ll (+228) 


``````````diff
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index f63f54ef94107..ec8b3f4b6318f 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -123,6 +123,9 @@ class OverflowingBinaryOperator : public Operator {
     return NoWrapKind;
   }
 
+  /// Return true if the instruction is commutative:
+  bool isCommutative() const { return Instruction::isCommutative(getOpcode()); }
+
   static bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Add ||
            I->getOpcode() == Instruction::Sub ||
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cc68fd4cf1c1b..8944eec2d63d4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1505,6 +1505,97 @@ foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
       ConstantInt::getTrue(ZeroUndef->getType()));
 }
 
+/// Return whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool leftDistributesOverRightIntrinsic(Instruction::BinaryOps LOp,
+                                              bool hasNUW, bool hasNSW,
+                                              Intrinsic::ID ROp) {
+  switch (ROp) {
+  case Intrinsic::umax:
+    return hasNUW && LOp == Instruction::Add;
+  case Intrinsic::umin:
+    return hasNUW && LOp == Instruction::Add;
+  case Intrinsic::smax:
+    return hasNSW && LOp == Instruction::Add;
+  case Intrinsic::smin:
+    return hasNSW && LOp == Instruction::Add;
+  default:
+    return false;
+  }
+}
+
+// Attempts to factorise a common term
+// in an instruction that has the form "(A op' B) op (C op' D)
+// where op is an intrinsic and op' is a binop
+static Value *
+foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II,
+                                   InstCombiner::BuilderTy &Builder) {
+  Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
+  Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
+
+  OverflowingBinaryOperator *Op0 = dyn_cast<OverflowingBinaryOperator>(LHS);
+  OverflowingBinaryOperator *Op1 = dyn_cast<OverflowingBinaryOperator>(RHS);
+
+  if (!Op0 || !Op1)
+    return nullptr;
+
+  if (Op0->getOpcode() != Op1->getOpcode())
+    return nullptr;
+
+  if (!(Op0->hasNoUnsignedWrap() == Op1->hasNoUnsignedWrap()) ||
+      !(Op0->hasNoSignedWrap() == Op1->hasNoSignedWrap()))
+    return nullptr;
+
+  if (!Op0->hasOneUse() || !Op1->hasOneUse())
+    return nullptr;
+
+  Instruction::BinaryOps InnerOpcode =
+      static_cast<Instruction::BinaryOps>(Op0->getOpcode());
+  bool HasNUW = Op0->hasNoUnsignedWrap();
+  bool HasNSW = Op0->hasNoSignedWrap();
+
+  if (!InnerOpcode)
+    return nullptr;
+
+  if (!leftDistributesOverRightIntrinsic(InnerOpcode, HasNUW, HasNSW,
+                                         TopLevelOpcode))
+    return nullptr;
+
+  assert(II->isCommutative() && Op0->isCommutative() &&
+         "Only inner and outer commutative op codes are supported.");
+
+  Value *A = Op0->getOperand(0);
+  Value *B = Op0->getOperand(1);
+  Value *C = Op1->getOperand(0);
+  Value *D = Op1->getOperand(1);
+
+  if (A == C || A == D) {
+    if (A != C)
+      std::swap(C, D);
+
+    Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
+    BinaryOperator *NewBinop =
+        cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, A));
+    NewBinop->setHasNoSignedWrap(HasNSW);
+    NewBinop->setHasNoUnsignedWrap(HasNUW);
+    return NewBinop;
+  }
+
+  if (B == D || B == C) {
+    if (B != D)
+      std::swap(C, D);
+
+    Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
+    BinaryOperator *NewBinop =
+        cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
+    NewBinop->setHasNoSignedWrap(HasNSW);
+    NewBinop->setHasNoUnsignedWrap(HasNUW);
+    return NewBinop;
+  }
+
+  return nullptr;
+}
+
 /// CallInst simplification. This mostly only handles folding of intrinsic
 /// instructions. For normal calls, it allows visitCallBase to do the heavy
 /// lifting.
@@ -1929,6 +2020,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       }
     }
 
+    if (Value *V = foldIntrinsicUsingDistributiveLaws(II, Builder))
+      return replaceInstUsesWith(*II, V);
+
     break;
   }
   case Intrinsic::bitreverse: {
diff --git a/llvm/test/Transforms/InstCombine/intrinsic-distributive.ll b/llvm/test/Transforms/InstCombine/intrinsic-distributive.ll
new file mode 100644
index 0000000000000..f58ce04cb6711
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/intrinsic-distributive.ll
@@ -0,0 +1,228 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s 2>&1 | FileCheck %s
+
+
+define i8 @umax_of_add_nuw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umax_of_add_nuw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT:    [[MAX:%.*]] = add nuw i8 [[TMP1]], [[A]]
+; CHECK-NEXT:    ret i8 [[MAX]]
+;
+  %add1 = add nuw i8 %b, %a
+  %add2 = add nuw i8 %c, %a
+  %max = call i8 @llvm.umax.i8(i8 %add1, i8 %add2)
+  ret i8 %max
+}
+
+define i8 @umax_of_add_nuw_comm(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umax_of_add_nuw_comm(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT:    [[MAX:%.*]] = add nuw i8 [[TMP1]], [[A]]
+; CHECK-NEXT:    ret i8 [[MAX]]
+;
+  %add1 = add nuw i8 %a, %b
+  %add2 = add nuw i8 %a, %c
+  %max = call i8 @llvm.umax.i8(i8 %add1, i8 %add2)
+  ret i8 %max
+}
+
+
+; negative test
+define i8 @umax_of_add_nsw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umax_of_add_nsw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i8 [[B]], [[A]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i8 [[C]], [[A]]
+; CHECK-NEXT:    [[MAX:%.*]] = call i8 @llvm.umax.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT:    ret i8 [[MAX]]
+;
+  %add1 = add nsw i8 %b, %a
+  %add2 = add nsw i8 %c, %a
+  %max = call i8 @llvm.umax.i8(i8 %add1, i8 %add2)
+  ret i8 %max
+}
+
+; negative test
+define i8 @umax_of_add(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umax_of_add(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add i8 [[B]], [[A]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i8 [[C]], [[A]]
+; CHECK-NEXT:    [[MAX:%.*]] = call i8 @llvm.umax.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT:    ret i8 [[MAX]]
+;
+  %add1 = add i8 %b, %a
+  %add2 = add i8 %c, %a
+  %max = call i8 @llvm.umax.i8(i8 %add1, i8 %add2)
+  ret i8 %max
+}
+
+define i8 @umin_of_add_nuw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umin_of_add_nuw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT:    [[MIN:%.*]] = add nuw i8 [[TMP1]], [[A]]
+; CHECK-NEXT:    ret i8 [[MIN]]
+;
+  %add1 = add nuw i8 %b, %a
+  %add2 = add nuw i8 %c, %a
+  %min = call i8 @llvm.umin.i8(i8 %add1, i8 %add2)
+  ret i8 %min
+}
+
+define i8 @umin_of_add_nuw_comm(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umin_of_add_nuw_comm(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT:    [[MIN:%.*]] = add nuw i8 [[TMP1]], [[A]]
+; CHECK-NEXT:    ret i8 [[MIN]]
+;
+  %add1 = add nuw i8 %a, %b
+  %add2 = add nuw i8 %a, %c
+  %min = call i8 @llvm.umin.i8(i8 %add1, i8 %add2)
+  ret i8 %min
+}
+
+; negative test
+define i8 @umin_of_add_nsw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umin_of_add_nsw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i8 [[B]], [[A]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i8 [[C]], [[A]]
+; CHECK-NEXT:    [[MIN:%.*]] = call i8 @llvm.umin.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT:    ret i8 [[MIN]]
+;
+  %add1 = add nsw i8 %b, %a
+  %add2 = add nsw i8 %c, %a
+  %min = call i8 @llvm.umin.i8(i8 %add1, i8 %add2)
+  ret i8 %min
+}
+
+; negative test
+define i8 @umin_of_add(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @umin_of_add(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add i8 [[B]], [[A]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i8 [[C]], [[A]]
+; CHECK-NEXT:    [[MIN:%.*]] = call i8 @llvm.umin.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT:    ret i8 [[MIN]]
+;
+  %add1 = add i8 %b, %a
+  %add2 = add i8 %c, %a
+  %min = call i8 @llvm.umin.i8(i8 %add1, i8 %add2)
+  ret i8 %min
+}
+
+; negative test
+define i8 @smax_of_add_nuw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smax_of_add_nuw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add nuw i8 [[B]], [[A]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add nuw i8 [[C]], [[A]]
+; CHECK-NEXT:    [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT:    ret i8 [[MAX]]
+;
+  %add1 = add nuw i8 %b, %a
+  %add2 = add nuw i8 %c, %a
+  %max = call i8 @llvm.smax.i8(i8 %add1, i8 %add2)
+  ret i8 %max
+}
+
+define i8 @smax_of_add_nsw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smax_of_add_nsw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT:    [[MAX:%.*]] = add nsw i8 [[TMP1]], [[A]]
+; CHECK-NEXT:    ret i8 [[MAX]]
+;
+  %add1 = add nsw i8 %b, %a
+  %add2 = add nsw i8 %c, %a
+  %max = call i8 @llvm.smax.i8(i8 %add1, i8 %add2)
+  ret i8 %max
+}
+
+define i8 @smax_of_add_nsw_comm(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smax_of_add_nsw_comm(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT:    [[MAX:%.*]] = add nsw i8 [[TMP1]], [[A]]
+; CHECK-NEXT:    ret i8 [[MAX]]
+;
+  %add1 = add nsw i8 %a, %b
+  %add2 = add nsw i8 %a, %c
+  %max = call i8 @llvm.smax.i8(i8 %add1, i8 %add2)
+  ret i8 %max
+}
+
+; negative test
+define i8 @smax_of_add(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smax_of_add(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add i8 [[B]], [[A]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i8 [[C]], [[A]]
+; CHECK-NEXT:    [[MAX:%.*]] = call i8 @llvm.smax.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT:    ret i8 [[MAX]]
+;
+  %add1 = add i8 %b, %a
+  %add2 = add i8 %c, %a
+  %max = call i8 @llvm.smax.i8(i8 %add1, i8 %add2)
+  ret i8 %max
+}
+
+; negative test
+define i8 @smin_of_add_nuw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smin_of_add_nuw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add nuw i8 [[B]], [[A]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add nuw i8 [[C]], [[A]]
+; CHECK-NEXT:    [[MIN:%.*]] = call i8 @llvm.smin.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT:    ret i8 [[MIN]]
+;
+  %add1 = add nuw i8 %b, %a
+  %add2 = add nuw i8 %c, %a
+  %min = call i8 @llvm.smin.i8(i8 %add1, i8 %add2)
+  ret i8 %min
+}
+
+define i8 @smin_of_add_nsw(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smin_of_add_nsw(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT:    [[MIN:%.*]] = add nsw i8 [[TMP1]], [[A]]
+; CHECK-NEXT:    ret i8 [[MIN]]
+;
+  %add1 = add nsw i8 %b, %a
+  %add2 = add nsw i8 %c, %a
+  %min = call i8 @llvm.smin.i8(i8 %add1, i8 %add2)
+  ret i8 %min
+}
+
+define i8 @smin_of_add_nsw_comm(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smin_of_add_nsw_comm(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[B]], i8 [[C]])
+; CHECK-NEXT:    [[MIN:%.*]] = add nsw i8 [[TMP1]], [[A]]
+; CHECK-NEXT:    ret i8 [[MIN]]
+;
+  %add1 = add nsw i8 %a, %b
+  %add2 = add nsw i8 %a, %c
+  %min = call i8 @llvm.smin.i8(i8 %add1, i8 %add2)
+  ret i8 %min
+}
+
+; negative test
+define i8 @smin_of_add(i8 %a, i8 %b, i8 %c) {
+; CHECK-LABEL: define i8 @smin_of_add(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) {
+; CHECK-NEXT:    [[ADD1:%.*]] = add i8 [[B]], [[A]]
+; CHECK-NEXT:    [[ADD2:%.*]] = add i8 [[C]], [[A]]
+; CHECK-NEXT:    [[MIN:%.*]] = call i8 @llvm.smin.i8(i8 [[ADD1]], i8 [[ADD2]])
+; CHECK-NEXT:    ret i8 [[MIN]]
+;
+  %add1 = add i8 %b, %a
+  %add2 = add i8 %c, %a
+  %min = call i8 @llvm.smin.i8(i8 %add1, i8 %add2)
+  ret i8 %min
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/101717


More information about the llvm-commits mailing list