[llvm] [InstCombine] Fold binary op of reductions. (PR #121567)
Mikhail Gudim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 3 04:54:32 PST 2025
https://github.com/mgudim created https://github.com/llvm/llvm-project/pull/121567
Replace binary of of two reductions with one reduction of the binary op applied to vectors. For example:
```
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
%res = add i32 %v0_red, %v1_red
```
gets transformed to:
```
%1 = add <16 x i32> %v0, %v1
%res = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %1)
```
>From 71a72dd6072141b2d1ed7361b2eab94dec3fe4e3 Mon Sep 17 00:00:00 2001
From: Mikhail Gudim <mgudim at ventanamicro.com>
Date: Thu, 2 Jan 2025 08:26:22 -0800
Subject: [PATCH] [InstCombine] Fold binary op of reductions.
Replace binary of of two reductions with one reduction of the binary op
applied to vectors. For example:
```
%v0_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v0)
%v1_red = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %v1)
%res = add i32 %v0_red, %v1_red
```
gets transformed to:
```
%1 = add <16 x i32> %v0, %v1
%res = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %1)
```
---
.../InstCombine/InstCombineAddSub.cpp | 18 ++-----
.../InstCombine/InstCombineAndOrXor.cpp | 9 ++++
.../InstCombine/InstCombineInternal.h | 1 +
.../InstCombine/InstCombineMulDivRem.cpp | 3 ++
.../InstCombine/InstructionCombining.cpp | 52 +++++++++++++++++++
5 files changed, 70 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 7a184a19d7c54a..42e816d527fcff 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1516,6 +1516,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
if (Instruction *X = foldVectorBinop(I))
return X;
+ if (Instruction *X = foldBinopOfReductions(I))
+ return replaceInstUsesWith(I, X);
+
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
@@ -2376,19 +2379,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
}
}
- auto m_AddRdx = [](Value *&Vec) {
- return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_add>(m_Value(Vec)));
- };
- Value *V0, *V1;
- if (match(Op0, m_AddRdx(V0)) && match(Op1, m_AddRdx(V1)) &&
- V0->getType() == V1->getType()) {
- // Difference of sums is sum of differences:
- // add_rdx(V0) - add_rdx(V1) --> add_rdx(V0 - V1)
- Value *Sub = Builder.CreateSub(V0, V1);
- Value *Rdx = Builder.CreateIntrinsic(Intrinsic::vector_reduce_add,
- {Sub->getType()}, {Sub});
- return replaceInstUsesWith(I, Rdx);
- }
+ if (Instruction *X = foldBinopOfReductions(I))
+ return replaceInstUsesWith(I, X);
if (Constant *C = dyn_cast<Constant>(Op0)) {
Value *X;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index e576eea4ca36a1..d9fcaf124d459b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2388,6 +2388,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Instruction *X = foldVectorBinop(I))
return X;
+ if (Instruction *X = foldBinopOfReductions(I))
+ return replaceInstUsesWith(I, X);
+
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
@@ -3588,6 +3591,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *X = foldVectorBinop(I))
return X;
+ if (Instruction *X = foldBinopOfReductions(I))
+ return replaceInstUsesWith(I, X);
+
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
@@ -4713,6 +4719,9 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (Instruction *X = foldVectorBinop(I))
return X;
+ if (Instruction *X = foldBinopOfReductions(I))
+ return replaceInstUsesWith(I, X);
+
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 3a074ee70dc487..99301d3e991f55 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -594,6 +594,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
/// Canonicalize the position of binops relative to shufflevector.
Instruction *foldVectorBinop(BinaryOperator &Inst);
+ Instruction *foldBinopOfReductions(BinaryOperator &Inst);
Instruction *foldVectorSelect(SelectInst &Sel);
Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index f85a3c93651353..98023c5eb89e42 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -205,6 +205,9 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
if (Instruction *X = foldVectorBinop(I))
return X;
+ if (Instruction *X = foldBinopOfReductions(I))
+ return replaceInstUsesWith(I, X);
+
if (Instruction *Phi = foldBinopWithPhiOperands(I))
return Phi;
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 934156f04f7fdd..12c53e8a0869f7 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2296,6 +2296,58 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
return nullptr;
}
+static Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc) {
+ switch (Opc) {
+ default:
+ break;
+ case Instruction::Add:
+ return Intrinsic::vector_reduce_add;
+ case Instruction::Mul:
+ return Intrinsic::vector_reduce_mul;
+ case Instruction::And:
+ return Intrinsic::vector_reduce_and;
+ case Instruction::Or:
+ return Intrinsic::vector_reduce_or;
+ case Instruction::Xor:
+ return Intrinsic::vector_reduce_xor;
+ }
+ return Intrinsic::num_intrinsics;
+}
+
+Instruction *InstCombinerImpl::foldBinopOfReductions(BinaryOperator &Inst) {
+ IntrinsicInst *II0 = dyn_cast<IntrinsicInst>(Inst.getOperand(0));
+ if (!II0)
+ return nullptr;
+ IntrinsicInst *II1 = dyn_cast<IntrinsicInst>(Inst.getOperand(1));
+ if (!II1)
+ return nullptr;
+
+ Instruction::BinaryOps BinOpOpc = Inst.getOpcode();
+ Intrinsic::ID ReductionIID = getReductionForBinop(BinOpOpc);
+ if (BinOpOpc == Instruction::Sub)
+ ReductionIID = Intrinsic::vector_reduce_add;
+
+ if (ReductionIID == Intrinsic::num_intrinsics)
+ return nullptr;
+ if (II0->getIntrinsicID() != ReductionIID)
+ return nullptr;
+ if (II1->getIntrinsicID() != ReductionIID)
+ return nullptr;
+
+ Value *V0 = II0->getArgOperand(0);
+ Value *V1 = II1->getArgOperand(0);
+ Type *VTy = V0->getType();
+ if (V1->getType() != VTy)
+ return nullptr;
+
+ Value *VectorBO = Builder.CreateBinOp(BinOpOpc, V0, V1);
+ // if (auto *VectorInstBO = dyn_cast<BinaryOperator>(VectorBO))
+ // VectorInstBO->copyIRFlags(&Inst);
+
+ Instruction *Rdx = Builder.CreateIntrinsic(ReductionIID, {VTy}, {VectorBO});
+ return Rdx;
+}
+
/// Try to narrow the width of a binop if at least 1 operand is an extend of
/// of a value. This requires a potentially expensive known bits check to make
/// sure the narrow op does not overflow.
More information about the llvm-commits
mailing list