[llvm] r290641 - [InstCombine] Canonicalize insert splat sequences into an insert + shuffle

Michael Kuperstein via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 27 16:18:08 PST 2016


Author: mkuper
Date: Tue Dec 27 18:18:08 2016
New Revision: 290641

URL: http://llvm.org/viewvc/llvm-project?rev=290641&view=rev
Log:
[InstCombine] Canonicalize insert splat sequences into an insert + shuffle

This adds a combine that canonicalizes a chain of inserts which broadcasts
a value into a single insert + a splat shufflevector.

This fixes PR31286.

Differential Revision: https://reviews.llvm.org/D27992

Added:
    llvm/trunk/test/Transforms/InstCombine/broadcast.ll
Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
    llvm/trunk/test/Transforms/BBVectorize/X86/loop1.ll
    llvm/trunk/test/Transforms/BBVectorize/loop1.ll
    llvm/trunk/test/Transforms/BBVectorize/simple-int.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/operandorder.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp?rev=290641&r1=290640&r2=290641&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Tue Dec 27 18:18:08 2016
@@ -593,6 +593,58 @@ static bool isShuffleEquivalentToSelect(
   return true;
 }
 
+// Turn a chain of inserts that splats a value into a canonical insert + shuffle
+// splat. That is:
+// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
+// shufflevector(insertelt(X, %k, 0), undef, zero)
+static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
+  // We are interested in the last insert in a chain. So, if this insert
+  // has a single user, and that user is an insert, bail.
+  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
+    return nullptr;
+
+  VectorType *VT = cast<VectorType>(InsElt.getType());
+  int NumElements = VT->getNumElements();
+
+  // Do not try to do this for a one-element vector, since that's a nop,
+  // and will cause an inf-loop.
+  if (NumElements == 1)
+    return nullptr;
+
+  Value *SplatVal = InsElt.getOperand(1);
+  InsertElementInst *CurrIE = &InsElt;  
+  SmallVector<bool, 16> ElementPresent(NumElements, false);
+
+  // Walk the chain backwards, keeping track of which indices we inserted into,
+  // until we hit something that isn't an insert of the splatted value.
+  while (CurrIE) {
+    ConstantInt *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
+    if (!Idx || CurrIE->getOperand(1) != SplatVal)
+      return nullptr;
+
+    // Check none of the intermediate steps have any additional uses.
+    if ((CurrIE != &InsElt) && !CurrIE->hasOneUse())
+      return nullptr;
+
+    ElementPresent[Idx->getZExtValue()] = true;
+    CurrIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
+  }
+
+  // Make sure we've seen an insert into every element.
+  if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
+    return nullptr;
+
+  // All right, create the insert + shuffle.
+  Instruction *InsertFirst = InsertElementInst::Create(
+      UndefValue::get(VT), SplatVal,
+      ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0), "", &InsElt);
+
+  Constant *ZeroMask = ConstantAggregateZero::get(
+      VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));
+
+  return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
+}
+
 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
 /// --> shufflevector X, CVec', Mask'
 static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
@@ -754,6 +806,11 @@ Instruction *InstCombiner::visitInsertEl
   if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
     return Shuf;
 
+  // Turn a sequence of inserts that broadcasts a scalar into a single
+  // insert + shufflevector.
+  if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
+    return Broadcast;
+
   return nullptr;
 }
 

Modified: llvm/trunk/test/Transforms/BBVectorize/X86/loop1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/BBVectorize/X86/loop1.ll?rev=290641&r1=290640&r2=290641&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/BBVectorize/X86/loop1.ll (original)
+++ llvm/trunk/test/Transforms/BBVectorize/X86/loop1.ll Tue Dec 27 18:18:08 2016
@@ -38,7 +38,7 @@ for.body:
 ; CHECK-NEXT: insertelement
 ; CHECK-NEXT: fadd <2 x double>
 ; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
+; CHECK-NEXT: shufflevector
 ; CHECK-NEXT: fadd <2 x double>
 ; CHECK-NEXT: insertelement
 ; CHECK-NEXT: fmul <2 x double>

Modified: llvm/trunk/test/Transforms/BBVectorize/loop1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/BBVectorize/loop1.ll?rev=290641&r1=290640&r2=290641&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/BBVectorize/loop1.ll (original)
+++ llvm/trunk/test/Transforms/BBVectorize/loop1.ll Tue Dec 27 18:18:08 2016
@@ -46,10 +46,10 @@ for.body:
 ; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
 ; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1
 ; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2
-; CHECK: %add5.v.i1.1 = insertelement <2 x double> undef, double %0, i32 0
-; CHECK: %add5.v.i1.2 = insertelement <2 x double> %add5.v.i1.1, double %0, i32 1
+; CHECK: %2 = insertelement <2 x double> undef, double %0, i32 0
+; CHECK: %add5.v.i1.2 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
 ; CHECK: %add5 = fadd <2 x double> %add4, %add5.v.i1.2
-; CHECK: %mul6.v.i0.2 = insertelement <2 x double> %add5.v.i1.1, double %mul8, i32 1
+; CHECK: %mul6.v.i0.2 = insertelement <2 x double> %2, double %mul8, i32 1
 ; CHECK: %mul6 = fmul <2 x double> %mul6.v.i0.2, %add5
 ; CHECK: %mul6.v.r1 = extractelement <2 x double> %mul6, i32 0
 ; CHECK: %mul6.v.r2 = extractelement <2 x double> %mul6, i32 1

Modified: llvm/trunk/test/Transforms/BBVectorize/simple-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/BBVectorize/simple-int.ll?rev=290641&r1=290640&r2=290641&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/BBVectorize/simple-int.ll (original)
+++ llvm/trunk/test/Transforms/BBVectorize/simple-int.ll Tue Dec 27 18:18:08 2016
@@ -177,7 +177,7 @@ define double @testcopysign(double %A1,
 ; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
-; CHECK: %Y1.v.i1.2 = insertelement <2 x double> %X1.v.i0.1, double %A1, i32 1
+; CHECK: %Y1.v.i1.2 = shufflevector <2 x double> %X1.v.i0.1, <2 x double> undef, <2 x i32> zeroinitializer
 ; CHECK: %Y1 = call <2 x double> @llvm.copysign.v2f64(<2 x double> %X1, <2 x double> %Y1.v.i1.2)
 ; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
 ; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0

Added: llvm/trunk/test/Transforms/InstCombine/broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/broadcast.ll?rev=290641&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/broadcast.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/broadcast.ll Tue Dec 27 18:18:08 2016
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK-LABEL: good1
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[INS]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good1(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: good2
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[INS]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good2(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 1
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 2
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 0
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: good3
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[INS]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good3(float %arg) {
+  %tmp = insertelement <4 x float> zeroinitializer, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: good4
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[ADD:.*]] = fadd <4 x float> %[[INS]], %[[INS]]
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[ADD]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good4(float %arg) {
+  %tmp = insertelement <4 x float> zeroinitializer, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  %tmp7 = fadd <4 x float> %tmp6, %tmp6
+  ret <4 x float> %tmp7
+}
+
+; CHECK-LABEL: bad1
+; CHECK-NOT: shufflevector
+define <4 x float> @bad1(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 1
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: bad2
+; CHECK-NOT: shufflevector
+define <4 x float> @bad2(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp5 = insertelement <4 x float> %tmp, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: bad3
+; CHECK-NOT: shufflevector
+define <4 x float> @bad3(float %arg, float %arg2) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg2, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: bad4
+; CHECK-NOT: shufflevector
+define <1 x float> @bad4(float %arg) {
+  %tmp = insertelement <1 x float> undef, float %arg, i32 0
+  ret <1 x float> %tmp
+}
+
+; CHECK-LABEL: bad5
+; CHECK-NOT: shufflevector
+define <4 x float> @bad5(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  %tmp7 = fadd <4 x float> %tmp6, %tmp4
+  ret <4 x float> %tmp7
+}
+
+; CHECK-LABEL: bad6
+; CHECK-NOT: shufflevector
+define <4 x float> @bad6(float %arg, i32 %k) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 %k
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/operandorder.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/operandorder.ll?rev=290641&r1=290640&r2=290641&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/operandorder.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/operandorder.ll Tue Dec 27 18:18:08 2016
@@ -26,7 +26,7 @@ define void @shuffle_operands1(double *
 
 ; CHECK-LABEL: shuffle_preserve_broadcast
 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
 define void @shuffle_preserve_broadcast(double * noalias %from,
                                         double * noalias %to,
                                         double %v1, double %v2) {
@@ -51,7 +51,7 @@ ext:
 
 ; CHECK-LABEL: shuffle_preserve_broadcast2
 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
 define void @shuffle_preserve_broadcast2(double * noalias %from,
                                         double * noalias %to,
                                         double %v1, double %v2) {
@@ -76,7 +76,7 @@ ext:
 
 ; CHECK-LABEL: shuffle_preserve_broadcast3
 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
 define void @shuffle_preserve_broadcast3(double * noalias %from,
                                         double * noalias %to,
                                         double %v1, double %v2) {
@@ -102,7 +102,7 @@ ext:
 
 ; CHECK-LABEL: shuffle_preserve_broadcast4
 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
 define void @shuffle_preserve_broadcast4(double * noalias %from,
                                         double * noalias %to,
                                         double %v1, double %v2) {
@@ -127,7 +127,7 @@ ext:
 
 ; CHECK-LABEL: shuffle_preserve_broadcast5
 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
 define void @shuffle_preserve_broadcast5(double * noalias %from,
                                         double * noalias %to,
                                         double %v1, double %v2) {
@@ -153,7 +153,7 @@ ext:
 
 ; CHECK-LABEL: shuffle_preserve_broadcast6
 ; CHECK: %[[BCAST:[a-z0-9]+]] = insertelement <2 x double> undef, double %v0_1
-; CHECK:                      = insertelement <2 x double> %[[BCAST]], double %v0_1
+; CHECK:                      = shufflevector <2 x double> %[[BCAST]], <2 x double> undef, <2 x i32> zeroinitializer
 define void @shuffle_preserve_broadcast6(double * noalias %from,
                                         double * noalias %to,
                                         double %v1, double %v2) {




More information about the llvm-commits mailing list