[llvm] 129ae51 - [INSTCOMBINE] Transform reduction(shuffle V, poison, unique_mask) to reduction(V).

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 29 10:18:33 PDT 2021


Author: Alexey Bataev
Date: 2021-06-29T10:02:38-07:00
New Revision: 129ae515fba022353050e0f313b32595de9e4b39

URL: https://github.com/llvm/llvm-project/commit/129ae515fba022353050e0f313b32595de9e4b39
DIFF: https://github.com/llvm/llvm-project/commit/129ae515fba022353050e0f313b32595de9e4b39.diff

LOG: [INSTCOMBINE] Transform reduction(shuffle V, poison, unique_mask) to reduction(V).

After SLP + LTO we may have have reduction(shuffle V, poison,
mask). This can be simplified to just reduction(V) if the mask is only
for single vector and just all elements from this vector are permuted,
  without reusing, replacing with undefs and/or other values, etc.

Differential Revision: https://reviews.llvm.org/D105053

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/test/Transforms/InstCombine/reduction-shufflevector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index fb3dfd89895b..552de8b072e3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Twine.h"
@@ -1983,6 +1984,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         replaceInstUsesWith(CI, Res);
         return eraseInstFromFunction(CI);
       }
+    LLVM_FALLTHROUGH;
+  }
+  case Intrinsic::vector_reduce_add:
+  case Intrinsic::vector_reduce_mul:
+  case Intrinsic::vector_reduce_xor:
+  case Intrinsic::vector_reduce_umax:
+  case Intrinsic::vector_reduce_umin:
+  case Intrinsic::vector_reduce_smax:
+  case Intrinsic::vector_reduce_smin:
+  case Intrinsic::vector_reduce_fmax:
+  case Intrinsic::vector_reduce_fmin:
+  case Intrinsic::vector_reduce_fadd:
+  case Intrinsic::vector_reduce_fmul: {
+    bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
+                              IID != Intrinsic::vector_reduce_fmul) ||
+                             II->hasAllowReassoc();
+    const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
+                             IID == Intrinsic::vector_reduce_fmul)
+                                ? 1
+                                : 0;
+    Value *Arg = II->getArgOperand(ArgIdx);
+    Value *V;
+    ArrayRef<int> Mask;
+    if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated ||
+        !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
+        !cast<ShuffleVectorInst>(Arg)->isSingleSource())
+      break;
+    int Sz = Mask.size();
+    SmallBitVector UsedIndices(Sz);
+    for (int Idx : Mask) {
+      if (Idx == UndefMaskElem || UsedIndices.test(Idx))
+        break;
+      UsedIndices.set(Idx);
+    }
+    // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
+    // other changes.
+    if (UsedIndices.all()) {
+      replaceUse(II->getOperandUse(ArgIdx), V);
+      return nullptr;
+    }
     break;
   }
   default: {

diff  --git a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll
index 1ecdb386ac1a..cf43f1bd626d 100644
--- a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll
+++ b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll
@@ -13,8 +13,7 @@ define i32 @reduce_add(<4 x i32> %x) {
 
 define i32 @reduce_or(<4 x i32> %x) {
 ; CHECK-LABEL: @reduce_or(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]])
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
@@ -24,8 +23,7 @@ define i32 @reduce_or(<4 x i32> %x) {
 
 define i32 @reduce_and(<4 x i32> %x) {
 ; CHECK-LABEL: @reduce_and(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
-; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]])
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -35,8 +33,7 @@ define i32 @reduce_and(<4 x i32> %x) {
 
 define i32 @reduce_xor(<4 x i32> %x) {
 ; CHECK-LABEL: @reduce_xor(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]])
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 5, i32 6, i32 7, i32 4>
@@ -46,8 +43,7 @@ define i32 @reduce_xor(<4 x i32> %x) {
 
 define i32 @reduce_umax(<4 x i32> %x) {
 ; CHECK-LABEL: @reduce_umax(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
-; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]])
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
@@ -57,8 +53,7 @@ define i32 @reduce_umax(<4 x i32> %x) {
 
 define i32 @reduce_umin(<4 x i32> %x) {
 ; CHECK-LABEL: @reduce_umin(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]])
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
@@ -68,8 +63,7 @@ define i32 @reduce_umin(<4 x i32> %x) {
 
 define i32 @reduce_smax(<4 x i32> %x) {
 ; CHECK-LABEL: @reduce_smax(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
-; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]])
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
@@ -79,8 +73,7 @@ define i32 @reduce_smax(<4 x i32> %x) {
 
 define i32 @reduce_smin(<4 x i32> %x) {
 ; CHECK-LABEL: @reduce_smin(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]])
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
@@ -90,19 +83,17 @@ define i32 @reduce_smin(<4 x i32> %x) {
 
 define float @reduce_fmax(<4 x float> %x) {
 ; CHECK-LABEL: @reduce_fmax(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
-; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call nnan nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]])
 ; CHECK-NEXT:    ret float [[RES]]
 ;
   %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
-  %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
+  %res = call nsz nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
   ret float %res
 }
 
 define float @reduce_fmin(<4 x float> %x) {
 ; CHECK-LABEL: @reduce_fmin(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]])
 ; CHECK-NEXT:    ret float [[RES]]
 ;
   %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
@@ -112,8 +103,7 @@ define float @reduce_fmin(<4 x float> %x) {
 
 define float @reduce_fadd(float %a, <4 x float> %x) {
 ; CHECK-LABEL: @reduce_fadd(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
 ; CHECK-NEXT:    ret float [[RES]]
 ;
   %shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
@@ -123,8 +113,7 @@ define float @reduce_fadd(float %a, <4 x float> %x) {
 
 define float @reduce_fmul(float %a, <4 x float> %x) {
 ; CHECK-LABEL: @reduce_fmul(
-; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
-; CHECK-NEXT:    [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
+; CHECK-NEXT:    [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
 ; CHECK-NEXT:    ret float [[RES]]
 ;
   %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2>


        


More information about the llvm-commits mailing list