[llvm] f62c8db - [Scalarizer] InsertElement handling w/ constant insert index

Mon Jul 6 03:20:07 PDT 2020

Author: Roman Lebedev
Date: 2020-07-06T13:19:32+03:00
New Revision: f62c8dbc99eaaac35506f655fdf4d7b1cc21c81c

URL: https://github.com/llvm/llvm-project/commit/f62c8dbc99eaaac35506f655fdf4d7b1cc21c81c
DIFF: https://github.com/llvm/llvm-project/commit/f62c8dbc99eaaac35506f655fdf4d7b1cc21c81c.diff

LOG: [Scalarizer] InsertElement handling w/ constant insert index

Summary: As it can be clearly seen from the diff, this results in nicer IR.

Reviewers: jdoerfert, arsenm, bjope, cameron.mcinally

Reviewed By: jdoerfert

Subscribers: arphaman, wdng, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83102

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/Scalarizer.cpp
    llvm/test/Transforms/Scalarizer/basic.ll
    llvm/test/Transforms/Scalarizer/constant-insertelement.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 2e414f78271f..6802a9101882 100644

--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -192,6 +192,7 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
   bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
   bool visitCastInst(CastInst &CI);
   bool visitBitCastInst(BitCastInst &BCI);
+  bool visitInsertElementInst(InsertElementInst &IEI);
   bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
   bool visitPHINode(PHINode &PHI);
   bool visitLoadInst(LoadInst &LI);
@@ -389,7 +390,7 @@ void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
   if (!SV.empty()) {
     for (unsigned I = 0, E = SV.size(); I != E; ++I) {
       Value *V = SV[I];
-      if (V == nullptr)
+      if (V == nullptr || SV[I] == CV[I])
         continue;
 
       Instruction *Old = cast<Instruction>(V);
@@ -740,6 +741,31 @@ bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
   return true;
 }
 
+bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
+  VectorType *VT = dyn_cast<VectorType>(IEI.getType());
+  if (!VT)
+    return false;
+
+  unsigned NumElems = VT->getNumElements();
+  IRBuilder<> Builder(&IEI);
+  Scatterer Op0 = scatter(&IEI, IEI.getOperand(0));
+  Value *NewElt = IEI.getOperand(1);
+  Value *InsIdx = IEI.getOperand(2);
+
+  ValueVector Res;
+  Res.resize(NumElems);
+
+  if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
+    for (unsigned I = 0; I < NumElems; ++I)
+      Res[I] = CI->getValue().getZExtValue() == I ? NewElt : Op0[I];
+  } else {
+    return false;
+  }
+
+  gather(&IEI, Res);
+  return true;
+}
+
 bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   VectorType *VT = dyn_cast<VectorType>(SVI.getType());
   if (!VT)

diff  --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll
index 2c82fd9cc3a5..2c7b6a6b588f 100644
--- a/llvm/test/Transforms/Scalarizer/basic.ll
+++ b/llvm/test/Transforms/Scalarizer/basic.ll
@@ -276,14 +276,14 @@ define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
 ; CHECK: %dest.i1 = getelementptr float*, float** %dest.i0, i32 1
 ; CHECK: %dest.i2 = getelementptr float*, float** %dest.i0, i32 2
 ; CHECK: %dest.i3 = getelementptr float*, float** %dest.i0, i32 3
+; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
+; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
+; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
 ; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
 ; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
-; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
 ; CHECK: %val.i0 = getelementptr float, float* %ptr0.i0, i32 100
 ; CHECK: %val.i1 = getelementptr float, float* %other, i32 %i0.i1
-; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
 ; CHECK: %val.i2 = getelementptr float, float* %ptr0.i2, i32 100
-; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
 ; CHECK: %val.i3 = getelementptr float, float* %ptr0.i3, i32 %i0.i3
 ; CHECK: store float* %val.i0, float** %dest.i0, align 32
 ; CHECK: store float* %val.i1, float** %dest.i1, align 8

diff  --git a/llvm/test/Transforms/Scalarizer/constant-insertelement.ll b/llvm/test/Transforms/Scalarizer/constant-insertelement.ll
index 8e8b640e9577..4ddde3598334 100644
--- a/llvm/test/Transforms/Scalarizer/constant-insertelement.ll
+++ b/llvm/test/Transforms/Scalarizer/constant-insertelement.ll
@@ -12,18 +12,9 @@ define <4 x i32> @f1(<4 x i32> *%src, i32 %repl, i32 %index) {
 ; ALL-NEXT:    [[VAL0_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4
 ; ALL-NEXT:    [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
 ; ALL-NEXT:    [[VAL0_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8
-; ALL-NEXT:    [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
-; ALL-NEXT:    [[VAL0_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4
-; ALL-NEXT:    [[VAL0_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL0_I0]], i32 0
-; ALL-NEXT:    [[VAL0_UPTO1:%.*]] = insertelement <4 x i32> [[VAL0_UPTO0]], i32 [[VAL0_I1]], i32 1
-; ALL-NEXT:    [[VAL0_UPTO2:%.*]] = insertelement <4 x i32> [[VAL0_UPTO1]], i32 [[VAL0_I2]], i32 2
-; ALL-NEXT:    [[VAL0:%.*]] = insertelement <4 x i32> [[VAL0_UPTO2]], i32 [[VAL0_I3]], i32 3
-; ALL-NEXT:    [[VAL0_I01:%.*]] = extractelement <4 x i32> [[VAL0]], i32 0
-; ALL-NEXT:    [[VAL2_I0:%.*]] = shl i32 1, [[VAL0_I01]]
-; ALL-NEXT:    [[VAL0_I12:%.*]] = extractelement <4 x i32> [[VAL0]], i32 1
-; ALL-NEXT:    [[VAL2_I1:%.*]] = shl i32 2, [[VAL0_I12]]
-; ALL-NEXT:    [[VAL0_I23:%.*]] = extractelement <4 x i32> [[VAL0]], i32 2
-; ALL-NEXT:    [[VAL2_I2:%.*]] = shl i32 3, [[VAL0_I23]]
+; ALL-NEXT:    [[VAL2_I0:%.*]] = shl i32 1, [[VAL0_I0]]
+; ALL-NEXT:    [[VAL2_I1:%.*]] = shl i32 2, [[VAL0_I1]]
+; ALL-NEXT:    [[VAL2_I2:%.*]] = shl i32 3, [[VAL0_I2]]
 ; ALL-NEXT:    [[VAL2_I3:%.*]] = shl i32 4, [[REPL:%.*]]
 ; ALL-NEXT:    [[VAL2_UPTO0:%.*]] = insertelement <4 x i32> undef, i32 [[VAL2_I0]], i32 0
 ; ALL-NEXT:    [[VAL2_UPTO1:%.*]] = insertelement <4 x i32> [[VAL2_UPTO0]], i32 [[VAL2_I1]], i32 1