[PATCH] D37236: [InstCombine] add insertelement + shuffle demanded element fold

Sanjay Patel via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 28 15:15:04 PDT 2017


spatel created this revision.
Herald added a subscriber: mcrosier.

I started drafting a patch for this as an add-on for simplify demanded elements that would have been similar to SimplifyMultipleUseDemandedBits(), but since I don't know if any more folds like this exist, it seems unnecessary to make a larger patch for this special case.

The problem arises because we (correctly, I think) bail out of SimplifyDemandedVectorElts() if a value has multiple uses. But then we call recognizeIdentityMask() which ignores undef shuffle mask elements. That means we might replace a shuffle with an insertelement that doesn't need to exist. At that point, there's no going back - we've lost the information (via the shuffle mask) that would have let any other transforms know that the insertelement was not actually needed. If we don't ignore undefs in recognizeIdentityMask(), we'll currently fail to simplify several other patterns, so chasing down all of those didn't seem like a better option.

This can manifest as bad codegen for things like horizontal ops (PR34111) because the bogus inserts in IR can't be ignored by the SLP vectorizer, and then the backend doesn't have enough pattern matching to see the redundancies:

  define <4 x float> @add_ps_002(<4 x float> %a) {
    %a0 = extractelement <4 x float> %a, i32 0
    %a1 = extractelement <4 x float> %a, i32 1
    %a2 = extractelement <4 x float> %a, i32 2
    %a3 = extractelement <4 x float> %a, i32 3
    %a0_again = extractelement <4 x float> %a, i32 0
    %a1_again = extractelement <4 x float> %a, i32 1
    %a2_again = extractelement <4 x float> %a, i32 2
    %a3_again = extractelement <4 x float> %a, i32 3
    %add01 = fadd float %a0, %a1
    %add23 = fadd float %a2, %a3
    %add01_again = fadd float %a0_again, %a1_again
    %add23_again = fadd float %a2_again, %a3_again
    %out0 = insertelement <4 x float> undef, float %add01, i32 0
    %out01 = insertelement <4 x float> %out0, float %add23, i32 1
    %out012 = insertelement <4 x float> %out01, float %add01_again, i32 2
    %out0123 = insertelement <4 x float> %out012, float %add23_again, i32 3
    %shuffle = shufflevector <4 x float> %out0123, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    ret <4 x float> %shuffle
  }

$ ./opt -instcombine -slp-vectorizer -instcombine -S hadd_bogus_undef.ll | ./llc -o - -mattr=avx

  vmovshdup	%xmm0, %xmm1    ## xmm1 = xmm0[1,1,3,3] <-- didn't need that...
  vaddss	%xmm1, %xmm0, %xmm1   <--- or that...
  vhaddps	%xmm0, %xmm0, %xmm0
  vinsertps	$32, %xmm1, %xmm0, %xmm0 ## xmm0 = xmm0[0,1],xmm1[0],xmm0[3]  <-- or that


https://reviews.llvm.org/D37236

Files:
  lib/Transforms/InstCombine/InstCombineVectorOps.cpp
  test/Transforms/InstCombine/vec_demanded_elts.ll


Index: test/Transforms/InstCombine/vec_demanded_elts.ll
===================================================================
--- test/Transforms/InstCombine/vec_demanded_elts.ll
+++ test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -146,9 +146,7 @@
 
 define <4 x float> @inselt_shuf_no_demand(float %a1, float %a2, float %a3) {
 ; CHECK-LABEL: @inselt_shuf_no_demand(
-; CHECK-NEXT:    [[OUT1:%.*]] = insertelement <4 x float> undef, float %a1, i32 1
-; CHECK-NEXT:    [[OUT12:%.*]] = insertelement <4 x float> [[OUT1]], float %a2, i32 2
-; CHECK-NEXT:    ret <4 x float> [[OUT12]]
+; CHECK-NEXT:    ret <4 x float> undef
 ;
   %out1 = insertelement <4 x float> undef, float %a1, i32 1
   %out12 = insertelement <4 x float> %out1, float %a2, i32 2
@@ -161,9 +159,7 @@
 
 define <4 x float> @inselt_shuf_no_demand_commute(float %a1, float %a2, float %a3) {
 ; CHECK-LABEL: @inselt_shuf_no_demand_commute(
-; CHECK-NEXT:    [[OUT1:%.*]] = insertelement <4 x float> undef, float %a1, i32 1
-; CHECK-NEXT:    [[OUT12:%.*]] = insertelement <4 x float> [[OUT1]], float %a2, i32 2
-; CHECK-NEXT:    ret <4 x float> [[OUT12]]
+; CHECK-NEXT:    ret <4 x float> undef
 ;
   %out1 = insertelement <4 x float> undef, float %a1, i32 1
   %out12 = insertelement <4 x float> %out1, float %a2, i32 2
Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1135,6 +1135,29 @@
   return true;
 }
 
+/// If a shuffle operand is an insert element to a lane that is not accessed
+/// by the shuffle, then bypass the insert element and use the insert's source
+/// operand vector directly. This is a specialization of a demanded elements
+/// fold that does not require hasOneUse().
+static Value *replaceInsertEltShuffleOperand(Value *ShufOp, bool IsRHS,
+                                             const SmallVectorImpl<int> &Mask) {
+  if (auto *InsElt = dyn_cast<InsertElementInst>(ShufOp)) {
+    if (auto *InsertIndexOp = dyn_cast<ConstantInt>(InsElt->getOperand(2))) {
+      // Translate the insert index to the shuffle's mask element values.
+      int InsertIndex = InsertIndexOp->getZExtValue();
+      if (IsRHS)
+        InsertIndex += ShufOp->getType()->getVectorNumElements();
+
+      // If the inserted value is not in the shuffle mask, then the
+      // insertelement does not affect the result of the shuffle.
+      if (none_of(Mask, [&](int Elt) { return Elt == InsertIndex; }))
+        return InsElt->getOperand(0);
+    }
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   Value *LHS = SVI.getOperand(0);
   Value *RHS = SVI.getOperand(1);
@@ -1158,6 +1181,17 @@
     MadeChange = true;
   }
 
+  // shuffle (insertelement V1, S, C), V2, M --> shuffle V1, V2, M
+  if (Value *NewOp = replaceInsertEltShuffleOperand(LHS, false, Mask)) {
+    SVI.setOperand(0, NewOp);
+    return &SVI;
+  }
+  // shuffle V1, (insertelement V2, S, C), M --> shuffle V1, V2, M
+  if (Value *NewOp = replaceInsertEltShuffleOperand(RHS, true, Mask)) {
+    SVI.setOperand(1, NewOp);
+    return &SVI;
+  }
+
   unsigned LHSWidth = LHS->getType()->getVectorNumElements();
 
   // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D37236.112970.patch
Type: text/x-patch
Size: 3374 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170828/36556b05/attachment.bin>


More information about the llvm-commits mailing list