[llvm] f444806 - [InstCombine] try to reduce shuffle with bitcasted operand

Thu Apr 2 10:50:12 PDT 2020

Author: Sanjay Patel
Date: 2020-04-02T13:44:50-04:00
New Revision: f4448063ccf1c2eda518ad326018276baff870d2

URL: https://github.com/llvm/llvm-project/commit/f4448063ccf1c2eda518ad326018276baff870d2
DIFF: https://github.com/llvm/llvm-project/commit/f4448063ccf1c2eda518ad326018276baff870d2.diff

LOG: [InstCombine] try to reduce shuffle with bitcasted operand

shuf (bitcast X), undef, Mask --> bitcast X'

The 'inverse shuffles' test (shuf_bitcast_operand) is a pattern
in the motivating examples from PR35454:
https://bugs.llvm.org/show_bug.cgi?id=35454
(see also D76727)

We can deal with this class of patterns in generic instcombine
because we are not creating any new shuffles, just a bitcast.

Alive2 proof:
http://volta.cs.utah.edu:8080/z/mwDUZf

Differential Revision: https://reviews.llvm.org/D76844

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
    llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 2d72696ee531..1e95f235b653 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1889,9 +1889,9 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   Value *LHS = SVI.getOperand(0);
   Value *RHS = SVI.getOperand(1);
-  if (auto *V =
-          SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
-                                    SVI.getType(), SQ.getWithInstruction(&SVI)))
+  SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
+  if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
+                                          SVI.getType(), ShufQuery))
     return replaceInstUsesWith(SVI, V);
 
   // shuffle x, x, mask --> shuffle x, undef, mask'
@@ -1899,6 +1899,32 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   unsigned LHSWidth = LHS->getType()->getVectorNumElements();
   ArrayRef<int> Mask = SVI.getShuffleMask();
   Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
+
+  // Peek through a bitcasted shuffle operand by scaling the mask. If the
+  // simulated shuffle can simplify, then this shuffle is unnecessary:
+  // shuf (bitcast X), undef, Mask --> bitcast X'
+  // TODO: This could be extended to allow length-changing shuffles and/or casts
+  //       to narrower elements. The transform might also be obsoleted if we
+  //       allowed canonicalization of bitcasted shuffles.
+  Value *X;
+  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
+      X->getType()->isVectorTy() && VWidth == LHSWidth &&
+      X->getType()->getVectorNumElements() >= VWidth) {
+    // Create the scaled mask constant.
+    Type *XType = X->getType();
+    unsigned XNumElts = XType->getVectorNumElements();
+    assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
+    unsigned ScaleFactor = XNumElts / VWidth;
+    SmallVector<int, 16> ScaledMask;
+    scaleShuffleMask(ScaleFactor, Mask, ScaledMask);
+
+    // If the shuffled source vector simplifies, cast that value to this
+    // shuffle's type.
+    if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType),
+                                            ScaledMask, XType, ShufQuery))
+      return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
+  }
+
   if (LHS == RHS) {
     assert(!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?");
     // Remap any references to RHS to use LHS.

diff  --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll
index 57b7f4c4dc1d..5af4f50a191f 100644
--- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll
+++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll
@@ -21,11 +21,12 @@ define void @test(<16 x i8> %w, i32* %o1, float* %o2) {
   ret void
 }
 
+; Shuffle-of-bitcast-splat --> splat-bitcast
+
 define <4 x i16> @splat_bitcast_operand(<8 x i8> %x) {
 ; CHECK-LABEL: @splat_bitcast_operand(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i16> [[BC]], <4 x i16> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[S2:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16>
 ; CHECK-NEXT:    ret <4 x i16> [[S2]]
 ;
   %s1 = shufflevector <8 x i8> %x, <8 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -34,12 +35,14 @@ define <4 x i16> @splat_bitcast_operand(<8 x i8> %x) {
   ret <4 x i16> %s2
 }
 
+; Shuffle-of-bitcast-splat --> splat-bitcast
+
 define <4 x i16> @splat_bitcast_operand_uses(<8 x i8> %x) {
 ; CHECK-LABEL: @splat_bitcast_operand_uses(
 ; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[BC:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16>
 ; CHECK-NEXT:    call void @use(<4 x i16> [[BC]])
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i16> [[BC]], <4 x i16> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[S2:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16>
 ; CHECK-NEXT:    ret <4 x i16> [[S2]]
 ;
   %s1 = shufflevector <8 x i8> %x, <8 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -49,11 +52,12 @@ define <4 x i16> @splat_bitcast_operand_uses(<8 x i8> %x) {
   ret <4 x i16> %s2
 }
 
+; Shuffle-of-bitcast-splat --> splat-bitcast
+
 define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) {
 ; CHECK-LABEL: @splat_bitcast_operand_same_size_src_elt(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 undef>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[S2:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[S2]]
 ;
   %s1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
@@ -66,9 +70,7 @@ define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) {
 
 define <4 x i32> @shuf_bitcast_operand(<16 x i8> %x) {
 ; CHECK-LABEL: @shuf_bitcast_operand(
-; CHECK-NEXT:    [[S1:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <16 x i8> [[S1]] to <4 x i32>
-; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[S2:%.*]] = bitcast <16 x i8> [[X:%.*]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[S2]]
 ;
   %s1 = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
@@ -77,6 +79,8 @@ define <4 x i32> @shuf_bitcast_operand(<16 x i8> %x) {
   ret <4 x i32> %s2
 }
 
+; TODO: Could allow fold for length-changing shuffles.
+
 define <5 x i16> @splat_bitcast_operand_change_type(<8 x i8> %x) {
 ; CHECK-LABEL: @splat_bitcast_operand_change_type(
 ; CHECK-NEXT:    [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -90,6 +94,8 @@ define <5 x i16> @splat_bitcast_operand_change_type(<8 x i8> %x) {
   ret <5 x i16> %s2
 }
 
+; TODO: Could allow fold for cast to narrow element.
+
 define <4 x i16> @splat_bitcast_operand_wider_src_elt(<2 x i32> %x) {
 ; CHECK-LABEL: @splat_bitcast_operand_wider_src_elt(
 ; CHECK-NEXT:    [[S1:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> <i32 1, i32 undef>