[llvm] 129ae51 - [INSTCOMBINE] Transform reduction(shuffle V, poison, unique_mask) to reduction(V).
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 29 10:18:33 PDT 2021
Author: Alexey Bataev
Date: 2021-06-29T10:02:38-07:00
New Revision: 129ae515fba022353050e0f313b32595de9e4b39
URL: https://github.com/llvm/llvm-project/commit/129ae515fba022353050e0f313b32595de9e4b39
DIFF: https://github.com/llvm/llvm-project/commit/129ae515fba022353050e0f313b32595de9e4b39.diff
LOG: [INSTCOMBINE] Transform reduction(shuffle V, poison, unique_mask) to reduction(V).
After SLP + LTO we may have have reduction(shuffle V, poison,
mask). This can be simplified to just reduction(V) if the mask is only
for single vector and just all elements from this vector are permuted,
without reusing, replacing with undefs and/or other values, etc.
Differential Revision: https://reviews.llvm.org/D105053
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/reduction-shufflevector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index fb3dfd89895b..552de8b072e3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
@@ -1983,6 +1984,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
replaceInstUsesWith(CI, Res);
return eraseInstFromFunction(CI);
}
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::vector_reduce_add:
+ case Intrinsic::vector_reduce_mul:
+ case Intrinsic::vector_reduce_xor:
+ case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_umin:
+ case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_fmax:
+ case Intrinsic::vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fadd:
+ case Intrinsic::vector_reduce_fmul: {
+ bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
+ IID != Intrinsic::vector_reduce_fmul) ||
+ II->hasAllowReassoc();
+ const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
+ IID == Intrinsic::vector_reduce_fmul)
+ ? 1
+ : 0;
+ Value *Arg = II->getArgOperand(ArgIdx);
+ Value *V;
+ ArrayRef<int> Mask;
+ if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated ||
+ !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
+ !cast<ShuffleVectorInst>(Arg)->isSingleSource())
+ break;
+ int Sz = Mask.size();
+ SmallBitVector UsedIndices(Sz);
+ for (int Idx : Mask) {
+ if (Idx == UndefMaskElem || UsedIndices.test(Idx))
+ break;
+ UsedIndices.set(Idx);
+ }
+ // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
+ // other changes.
+ if (UsedIndices.all()) {
+ replaceUse(II->getOperandUse(ArgIdx), V);
+ return nullptr;
+ }
break;
}
default: {
diff --git a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll
index 1ecdb386ac1a..cf43f1bd626d 100644
--- a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll
+++ b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll
@@ -13,8 +13,7 @@ define i32 @reduce_add(<4 x i32> %x) {
define i32 @reduce_or(<4 x i32> %x) {
; CHECK-LABEL: @reduce_or(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
@@ -24,8 +23,7 @@ define i32 @reduce_or(<4 x i32> %x) {
define i32 @reduce_and(<4 x i32> %x) {
; CHECK-LABEL: @reduce_and(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -35,8 +33,7 @@ define i32 @reduce_and(<4 x i32> %x) {
define i32 @reduce_xor(<4 x i32> %x) {
; CHECK-LABEL: @reduce_xor(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 5, i32 6, i32 7, i32 4>
@@ -46,8 +43,7 @@ define i32 @reduce_xor(<4 x i32> %x) {
define i32 @reduce_umax(<4 x i32> %x) {
; CHECK-LABEL: @reduce_umax(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
@@ -57,8 +53,7 @@ define i32 @reduce_umax(<4 x i32> %x) {
define i32 @reduce_umin(<4 x i32> %x) {
; CHECK-LABEL: @reduce_umin(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
@@ -68,8 +63,7 @@ define i32 @reduce_umin(<4 x i32> %x) {
define i32 @reduce_smax(<4 x i32> %x) {
; CHECK-LABEL: @reduce_smax(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
@@ -79,8 +73,7 @@ define i32 @reduce_smax(<4 x i32> %x) {
define i32 @reduce_smin(<4 x i32> %x) {
; CHECK-LABEL: @reduce_smin(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
@@ -90,19 +83,17 @@ define i32 @reduce_smin(<4 x i32> %x) {
define float @reduce_fmax(<4 x float> %x) {
; CHECK-LABEL: @reduce_fmax(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
-; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call nnan nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]])
; CHECK-NEXT: ret float [[RES]]
;
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
- %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
+ %res = call nsz nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
ret float %res
}
define float @reduce_fmin(<4 x float> %x) {
; CHECK-LABEL: @reduce_fmin(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]])
; CHECK-NEXT: ret float [[RES]]
;
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
@@ -112,8 +103,7 @@ define float @reduce_fmin(<4 x float> %x) {
define float @reduce_fadd(float %a, <4 x float> %x) {
; CHECK-LABEL: @reduce_fadd(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
; CHECK-NEXT: ret float [[RES]]
;
%shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
@@ -123,8 +113,7 @@ define float @reduce_fadd(float %a, <4 x float> %x) {
define float @reduce_fmul(float %a, <4 x float> %x) {
; CHECK-LABEL: @reduce_fmul(
-; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
+; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
; CHECK-NEXT: ret float [[RES]]
;
%shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
More information about the llvm-commits
mailing list