[PATCH] D63848: [InstCombine] allow undef elements when forming splat from chain of insertelements

Sanjay Patel via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 26 15:48:24 PDT 2019


spatel created this revision.
spatel added reviewers: efriedma, RKSimon, lebedev.ri.
Herald added subscribers: hiraditya, mcrosier.
Herald added a project: LLVM.

We allow forming a splat (broadcast) shuffle, but we were conservatively limiting that to cases where all elements of the vector are specified. It should be safe from a codegen perspective to allow undefined lanes of the vector because the expansion of a splat shuffle would become the chain of inserts again.

Forming splat shuffles can reduce IR and help enable further IR transforms. Motivating bugs:
https://bugs.llvm.org/show_bug.cgi?id=42174
https://bugs.llvm.org/show_bug.cgi?id=16739 (this one needs a lot of changes, and I'm still not sure how to fix it)


https://reviews.llvm.org/D63848

Files:
  llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
  llvm/test/Transforms/InstCombine/broadcast.ll


Index: llvm/test/Transforms/InstCombine/broadcast.ll
===================================================================
--- llvm/test/Transforms/InstCombine/broadcast.ll
+++ llvm/test/Transforms/InstCombine/broadcast.ll
@@ -72,11 +72,12 @@
   ret <4 x float> %res
 }
 
-define <4 x float> @bad1(float %arg) {
-; CHECK-LABEL: @bad1(
-; CHECK-NEXT:    [[T4:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 1
-; CHECK-NEXT:    [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2
-; CHECK-NEXT:    [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
+; The insert is changed to allow the canonical shuffle-splat pattern from element 0.
+
+define <4 x float> @splat_undef1(float %arg) {
+; CHECK-LABEL: @splat_undef1(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
+; CHECK-NEXT:    [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
 ; CHECK-NEXT:    ret <4 x float> [[T6]]
 ;
   %t = insertelement <4 x float> undef, float %arg, i32 1
@@ -86,11 +87,12 @@
   ret <4 x float> %t6
 }
 
-define <4 x float> @bad2(float %arg) {
-; CHECK-LABEL: @bad2(
+; Re-uses the existing first insertelement.
+
+define <4 x float> @splat_undef2(float %arg) {
+; CHECK-LABEL: @splat_undef2(
 ; CHECK-NEXT:    [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
-; CHECK-NEXT:    [[T5:%.*]] = insertelement <4 x float> [[T]], float [[ARG]], i32 2
-; CHECK-NEXT:    [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
+; CHECK-NEXT:    [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
 ; CHECK-NEXT:    ret <4 x float> [[T6]]
 ;
   %t = insertelement <4 x float> undef, float %arg, i32 0
@@ -123,10 +125,13 @@
   ret <1 x float> %t
 }
 
-define <4 x float> @bad5(float %arg) {
-; CHECK-LABEL: @bad5(
+; Multiple undef elements are ok.
+; TODO: Multiple uses triggers the transform at %t4, but we could form another splat from %t6 and simplify?
+
+define <4 x float> @splat_undef3(float %arg) {
+; CHECK-LABEL: @splat_undef3(
 ; CHECK-NEXT:    [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
-; CHECK-NEXT:    [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG]], i32 1
+; CHECK-NEXT:    [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2
 ; CHECK-NEXT:    [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
 ; CHECK-NEXT:    [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]]
Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -704,10 +704,18 @@
     CurrIE = NextIE;
   }
 
-  // Make sure we've seen an insert into every element.
-  if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
+  // If this is just a single insertelement (not a sequence), we are done.
+  if (FirstIE == &InsElt)
     return nullptr;
 
+  // If we are not inserting into an undef vector, make sure we've seen an
+  // insert into every element.
+  // TODO: If the base vector is not undef, it might be better to create a splat
+  //       and then a select-shuffle (blend) with the base vector.
+  if (!isa<UndefValue>(FirstIE->getOperand(0)))
+    if (any_of(ElementPresent, [](bool Present) { return !Present; }))
+      return nullptr;
+
   // Create the insert + shuffle.
   Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
   UndefValue *UndefVec = UndefValue::get(VecTy);
@@ -715,8 +723,13 @@
   if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
     FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
 
-  Constant *ZeroMask = ConstantVector::getSplat(NumElements, Zero);
-  return new ShuffleVectorInst(FirstIE, UndefVec, ZeroMask);
+  // Splat from element 0, but replace absent elements with undef in the mask.
+  SmallVector<Constant *, 16> Mask(NumElements, Zero);
+  for (unsigned i = 0; i != NumElements; ++i)
+    if (!ElementPresent[i])
+      Mask[i] = UndefValue::get(Int32Ty);
+
+  return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
 }
 
 /// If we have an insertelement instruction feeding into another insertelement


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D63848.206756.patch
Type: text/x-patch
Size: 4513 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190626/44a46a26/attachment.bin>


More information about the llvm-commits mailing list