[llvm] 432c199 - [InstCombine] move shuffle after min/max with same-shuffled operands

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Tue May 3 13:23:27 PDT 2022


Author: Sanjay Patel
Date: 2022-05-03T16:23:11-04:00
New Revision: 432c199e84730e4bb0aec964aa1d13f0226455ac

URL: https://github.com/llvm/llvm-project/commit/432c199e84730e4bb0aec964aa1d13f0226455ac
DIFF: https://github.com/llvm/llvm-project/commit/432c199e84730e4bb0aec964aa1d13f0226455ac.diff

LOG: [InstCombine] move shuffle after min/max with same-shuffled operands

This is an intrinsic version of the existing fold for binops.
As a first step, I only allowed min/max, but the code is set
up to make adding more intrinsics easy (with more or less than
2 arguments).

This (and possible follow-ups) are discussed in issue #46238.

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/test/Transforms/InstCombine/minmax-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 957b954c2afb8..f35800e9f83af 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1080,6 +1080,43 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
   return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
 }
 
+/// If all arguments of the intrinsic are unary shuffles with the same mask,
+/// try to shuffle after the intrinsic.
+static Instruction *
+foldShuffledIntrinsicOperands(IntrinsicInst *II,
+                              InstCombiner::BuilderTy &Builder) {
+  // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
+  //       etc. Use llvm::isTriviallyVectorizable() and related to determine
+  //       which intrinsics are safe to shuffle?
+  if (!match(II, m_MaxOrMin(m_Value(), m_Value())))
+    return nullptr;
+
+  Value *X;
+  ArrayRef<int> Mask;
+  if (!match(II->getArgOperand(0),
+             m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
+    return nullptr;
+
+  // At least 1 operand must have 1 use because we are creating 2 instructions.
+  if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
+    return nullptr;
+
+  // See if all arguments are shuffled with the same mask.
+  SmallVector<Value *, 4> NewArgs(II->arg_size());
+  NewArgs[0] = X;
+  for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
+    if (!match(II->getArgOperand(i),
+               m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))))
+      return nullptr;
+    NewArgs[i] = X;
+  }
+
+  // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
+  Value *NewIntrinsic =
+      Builder.CreateIntrinsic(II->getIntrinsicID(), X->getType(), NewArgs);
+  return new ShuffleVectorInst(NewIntrinsic, Mask);
+}
+
 /// CallInst simplification. This mostly only handles folding of intrinsic
 /// instructions. For normal calls, it allows visitCallBase to do the heavy
 /// lifting.
@@ -2622,6 +2659,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     break;
   }
   }
+
+  if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
+    return Shuf;
+
   // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
   // context, so it is handled in visitCallBase and we should trigger it.
   return visitCallBase(*II);

diff  --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index c41dae0e64e52..7fb4f80c5cadb 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -2364,9 +2364,8 @@ define i8 @umax_umax_reassoc_constantexpr_sink(i8 %x, i8 %y) {
 
 define <3 x i8> @smax_unary_shuffle_ops(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @smax_unary_shuffle_ops(
-; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
-; CHECK-NEXT:    [[SY:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
-; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
 ; CHECK-NEXT:    ret <3 x i8> [[R]]
 ;
   %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
@@ -2379,8 +2378,8 @@ define <3 x i8> @smin_unary_shuffle_ops_use_poison_mask_elt(<3 x i8> %x, <3 x i8
 ; CHECK-LABEL: @smin_unary_shuffle_ops_use_poison_mask_elt(
 ; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 undef, i32 0, i32 2>
 ; CHECK-NEXT:    call void @use_vec(<3 x i8> [[SX]])
-; CHECK-NEXT:    [[SY:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> poison, <3 x i32> <i32 undef, i32 0, i32 2>
-; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X]], <3 x i8> [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <3 x i8> [[TMP1]], <3 x i8> poison, <3 x i32> <i32 undef, i32 0, i32 2>
 ; CHECK-NEXT:    ret <3 x i8> [[R]]
 ;
   %sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 poison, i32 0, i32 2>
@@ -2392,10 +2391,10 @@ define <3 x i8> @smin_unary_shuffle_ops_use_poison_mask_elt(<3 x i8> %x, <3 x i8
 
 define <3 x i8> @umax_unary_shuffle_ops_use_widening(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @umax_unary_shuffle_ops_use_widening(
-; CHECK-NEXT:    [[SX:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
 ; CHECK-NEXT:    [[SY:%.*]] = shufflevector <2 x i8> [[Y:%.*]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
 ; CHECK-NEXT:    call void @use_vec(<3 x i8> [[SY]])
-; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y]])
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
 ; CHECK-NEXT:    ret <3 x i8> [[R]]
 ;
   %sx = shufflevector <2 x i8> %x, <2 x i8> poison, <3 x i32> <i32 1, i32 0, i32 0>
@@ -2407,9 +2406,8 @@ define <3 x i8> @umax_unary_shuffle_ops_use_widening(<2 x i8> %x, <2 x i8> %y) {
 
 define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-LABEL: @umin_unary_shuffle_ops_narrowing(
-; CHECK-NEXT:    [[SX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
-; CHECK-NEXT:    [[SY:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = call <3 x i8> @llvm.umin.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
 ; CHECK-NEXT:    ret <3 x i8> [[R]]
 ;
   %sx = shufflevector <4 x i8> %x, <4 x i8> poison, <3 x i32> <i32 1, i32 0, i32 3>
@@ -2418,6 +2416,8 @@ define <3 x i8> @umin_unary_shuffle_ops_narrowing(<4 x i8> %x, <4 x i8> %y) {
   ret <3 x i8> %r
 }
 
+; negative test - must have 2 shuffles
+
 define <3 x i8> @smax_unary_shuffle_ops_unshuffled_op(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @smax_unary_shuffle_ops_unshuffled_op(
 ; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 0, i32 0, i32 2>
@@ -2429,6 +2429,8 @@ define <3 x i8> @smax_unary_shuffle_ops_unshuffled_op(<3 x i8> %x, <3 x i8> %y)
   ret <3 x i8> %r
 }
 
+; negative test - must have identical masks
+
 define <3 x i8> @smax_unary_shuffle_ops_wrong_mask(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @smax_unary_shuffle_ops_wrong_mask(
 ; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 0, i32 0, i32 2>
@@ -2442,6 +2444,8 @@ define <3 x i8> @smax_unary_shuffle_ops_wrong_mask(<3 x i8> %x, <3 x i8> %y) {
   ret <3 x i8> %r
 }
 
+; negative test - must be unary shuffles
+
 define <3 x i8> @smax_unary_shuffle_ops_wrong_shuf(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
 ; CHECK-LABEL: @smax_unary_shuffle_ops_wrong_shuf(
 ; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> [[Z:%.*]], <3 x i32> <i32 1, i32 0, i32 3>
@@ -2455,6 +2459,8 @@ define <3 x i8> @smax_unary_shuffle_ops_wrong_shuf(<3 x i8> %x, <3 x i8> %y, <3
   ret <3 x i8> %r
 }
 
+; negative test - too many uses
+
 define <3 x i8> @smin_unary_shuffle_ops_uses(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @smin_unary_shuffle_ops_uses(
 ; CHECK-NEXT:    [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>


        


More information about the llvm-commits mailing list