[llvm] r275419 - [LV] Avoid unnecessary IV scalar-to-vector-to-scalar conversions
Matthew Simpson via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 14 07:36:07 PDT 2016
Author: mssimpso
Date: Thu Jul 14 09:36:06 2016
New Revision: 275419
URL: http://llvm.org/viewvc/llvm-project?rev=275419&view=rev
Log:
[LV] Avoid unnecessary IV scalar-to-vector-to-scalar conversions
This patch prevents increases in the number of instructions, pre-instcombine,
due to induction variable scalarization. An increase in instructions can lead
to an increase in the compile-time required to simplify the induction
variables. We now maintain a new map for scalarized induction variables to
prevent us from converting between the scalar and vector forms.
This patch should resolve compile-time regressions seen after r274627.
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/trunk/test/Transforms/LoopVectorize/induction.ll
llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=275419&r1=275418&r2=275419&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu Jul 14 09:36:06 2016
@@ -403,12 +403,12 @@ protected:
/// to each vector element of Val. The sequence starts at StartIndex.
virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
- /// Compute a step vector like the above function, but scalarize the
- /// arithmetic instead. The results of the computation are inserted into a
- /// new vector with VF elements. \p Val is the initial value, \p Step is the
- /// size of the step, and \p StartIdx indicates the index of the increment
- /// from which to start computing the steps.
- Value *getScalarizedStepVector(Value *Val, int StartIdx, Value *Step);
+ /// Compute scalar induction steps. \p ScalarIV is the scalar induction
+ /// variable on which to base the steps, \p Step is the size of the step, and
+ /// \p EntryVal is the value from the original loop that maps to the steps.
+ /// Note that \p EntryVal doesn't have to be an induction variable (e.g., it
+ /// can be a truncate instruction).
+ void buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal);
/// Create a vector induction phi node based on an existing scalar one. This
/// currently only works for integer induction variables with a constant
@@ -417,11 +417,11 @@ protected:
void createVectorIntInductionPHI(const InductionDescriptor &II,
VectorParts &Entry, IntegerType *TruncType);
- /// Widen an integer induction variable \p IV. If \p TruncType is provided,
- /// the induction variable will first be truncated to the specified type. The
+ /// Widen an integer induction variable \p IV. If \p Trunc is provided, the
+ /// induction variable will first be truncated to the corresponding type. The
/// widened values are placed in \p Entry.
void widenIntInduction(PHINode *IV, VectorParts &Entry,
- IntegerType *TruncType = nullptr);
+ TruncInst *Trunc = nullptr);
/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
@@ -572,6 +572,17 @@ protected:
PHINode *OldInduction;
/// Maps scalars to widened vectors.
ValueMap WidenMap;
+
+ /// A map of induction variables from the original loop to their
+ /// corresponding VF * UF scalarized values in the vectorized loop. The
+ /// purpose of ScalarIVMap is similar to that of WidenMap. Whereas WidenMap
+ /// maps original loop values to their vector versions in the new loop,
+ /// ScalarIVMap maps induction variables from the original loop that are not
+ /// vectorized to their scalar equivalents in the vector loop. Maintaining a
+ /// separate map for scalarized induction variables allows us to avoid
+ /// unnecessary scalar-to-vector-to-scalar conversions.
+ DenseMap<Value *, SmallVector<Value *, 8>> ScalarIVMap;
+
/// Store instructions that should be predicated, as a pair
/// <StoreInst, Predicate>
SmallVector<std::pair<StoreInst *, Value *>, 4> PredicatedStores;
@@ -1889,7 +1900,7 @@ void InnerLoopVectorizer::createVectorIn
}
void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,
- IntegerType *TruncType) {
+ TruncInst *Trunc) {
auto II = Legal->getInductionVars()->find(IV);
assert(II != Legal->getInductionVars()->end() && "IV is not an induction");
@@ -1897,6 +1908,9 @@ void InnerLoopVectorizer::widenIntInduct
auto ID = II->second;
assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
+ // If a truncate instruction was provided, get the smaller type.
+ auto *TruncType = Trunc ? cast<IntegerType>(Trunc->getType()) : nullptr;
+
// The step of the induction.
Value *Step = nullptr;
@@ -1939,20 +1953,21 @@ void InnerLoopVectorizer::widenIntInduct
}
}
- // If an induction variable is only used for counting loop iterations or
- // calculating addresses, it shouldn't be widened. Scalarize the step vector
- // to give InstCombine a better chance of simplifying it.
- if (VF > 1 && ValuesNotWidened->count(IV)) {
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = getScalarizedStepVector(ScalarIV, VF * Part, Step);
- return;
- }
-
- // Finally, splat the scalar induction variable, and build the necessary step
- // vectors.
+ // Splat the scalar induction variable, and build the necessary step vectors.
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
+
+ // If an induction variable is only used for counting loop iterations or
+ // calculating addresses, it doesn't need to be widened. Create scalar steps
+ // that can be used by instructions we will later scalarize. Note that the
+ // addition of the scalar steps will not increase the number of instructions
+ // in the loop in the common case prior to InstCombine. We will be trading
+ // one vector extract for each scalar step.
+ if (VF > 1 && ValuesNotWidened->count(IV)) {
+ auto *EntryVal = Trunc ? cast<Value>(Trunc) : IV;
+ buildScalarSteps(ScalarIV, Step, EntryVal);
+ }
}
Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
@@ -1983,27 +1998,25 @@ Value *InnerLoopVectorizer::getStepVecto
return Builder.CreateAdd(Val, Step, "induction");
}
-Value *InnerLoopVectorizer::getScalarizedStepVector(Value *Val, int StartIdx,
- Value *Step) {
+void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
+ Value *EntryVal) {
- // We can't create a vector with less than two elements.
+ // We shouldn't have to build scalar steps if we aren't vectorizing.
assert(VF > 1 && "VF should be greater than one");
// Get the value type and ensure it and the step have the same integer type.
- Type *ValTy = Val->getType()->getScalarType();
- assert(ValTy->isIntegerTy() && ValTy == Step->getType() &&
+ Type *ScalarIVTy = ScalarIV->getType()->getScalarType();
+ assert(ScalarIVTy->isIntegerTy() && ScalarIVTy == Step->getType() &&
"Val and Step should have the same integer type");
- // Compute the scalarized step vector. We perform scalar arithmetic and then
- // insert the results into the step vector.
- Value *StepVector = UndefValue::get(ToVectorTy(ValTy, VF));
- for (unsigned I = 0; I < VF; ++I) {
- auto *Mul = Builder.CreateMul(ConstantInt::get(ValTy, StartIdx + I), Step);
- auto *Add = Builder.CreateAdd(Val, Mul);
- StepVector = Builder.CreateInsertElement(StepVector, Add, I);
- }
-
- return StepVector;
+ // Compute the scalar steps and save the results in ScalarIVMap.
+ for (unsigned Part = 0; Part < UF; ++Part)
+ for (unsigned I = 0; I < VF; ++I) {
+ auto *StartIdx = ConstantInt::get(ScalarIVTy, VF * Part + I);
+ auto *Mul = Builder.CreateMul(StartIdx, Step);
+ auto *Add = Builder.CreateAdd(ScalarIV, Mul);
+ ScalarIVMap[EntryVal].push_back(Add);
+ }
}
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
@@ -2459,8 +2472,14 @@ void InnerLoopVectorizer::vectorizeMemor
"Must be last index or loop invariant");
VectorParts &GEPParts = getVectorValue(GepOperand);
- Value *Index = GEPParts[0];
- Index = Builder.CreateExtractElement(Index, Zero);
+
+ // If GepOperand is an induction variable, and there's a scalarized
+ // version of it available, use it. Otherwise, we will need to create
+ // an extractelement instruction.
+ Value *Index = ScalarIVMap.count(GepOperand)
+ ? ScalarIVMap[GepOperand][0]
+ : Builder.CreateExtractElement(GEPParts[0], Zero);
+
Gep2->setOperand(i, Index);
Gep2->setName("gep.indvar.idx");
}
@@ -2663,11 +2682,17 @@ void InnerLoopVectorizer::scalarizeInstr
Cloned->setName(Instr->getName() + ".cloned");
// Replace the operands of the cloned instructions with extracted scalars.
for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
- Value *Op = Params[op][Part];
- // Param is a vector. Need to extract the right lane.
- if (Op->getType()->isVectorTy())
- Op = Builder.CreateExtractElement(Op, Builder.getInt32(Width));
- Cloned->setOperand(op, Op);
+
+ // If the operand is an induction variable, and there's a scalarized
+ // version of it available, use it. Otherwise, we will need to create
+ // an extractelement instruction if vectorizing.
+ auto *NewOp = Params[op][Part];
+ auto *ScalarOp = Instr->getOperand(op);
+ if (ScalarIVMap.count(ScalarOp))
+ NewOp = ScalarIVMap[ScalarOp][VF * Part + Width];
+ else if (NewOp->getType()->isVectorTy())
+ NewOp = Builder.CreateExtractElement(NewOp, Builder.getInt32(Width));
+ Cloned->setOperand(op, NewOp);
}
addNewMetadata(Cloned, Instr);
@@ -4160,8 +4185,7 @@ void InnerLoopVectorizer::vectorizeBlock
auto ID = Legal->getInductionVars()->lookup(OldInduction);
if (isa<TruncInst>(CI) && CI->getOperand(0) == OldInduction &&
ID.getConstIntStepValue()) {
- auto *TruncType = cast<IntegerType>(CI->getType());
- widenIntInduction(OldInduction, Entry, TruncType);
+ widenIntInduction(OldInduction, Entry, cast<TruncInst>(CI));
addMetadata(Entry, &I);
break;
}
Modified: llvm/trunk/test/Transforms/LoopVectorize/induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction.ll?rev=275419&r1=275418&r2=275419&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/induction.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/induction.ll Thu Jul 14 09:36:06 2016
@@ -1,6 +1,7 @@
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S | FileCheck %s --check-prefix=INTERLEAVE
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -73,6 +74,26 @@ loopexit:
; for (int i = 0; i < n; ++i)
; sum += a[i];
;
+; CHECK-LABEL: @scalarize_induction_variable_01(
+; CHECK: vector.body:
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %[[i0:.+]] = add i64 %index, 0
+; CHECK: %[[i1:.+]] = add i64 %index, 1
+; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i0]]
+; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i1]]
+;
+; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01(
+; UNROLL-NO-IC: vector.body:
+; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL-NO-IC: %[[i0:.+]] = add i64 %index, 0
+; UNROLL-NO-IC: %[[i1:.+]] = add i64 %index, 1
+; UNROLL-NO-IC: %[[i2:.+]] = add i64 %index, 2
+; UNROLL-NO-IC: %[[i3:.+]] = add i64 %index, 3
+; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i0]]
+; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i1]]
+; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i2]]
+; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i3]]
+;
; IND-LABEL: @scalarize_induction_variable_01(
; IND: vector.body:
; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
@@ -112,6 +133,34 @@ for.end:
; for (int i ; 0; i < n; i += 8)
; s += (a[i] + b[i] + 1.0f);
;
+; CHECK-LABEL: @scalarize_induction_variable_02(
+; CHECK: vector.body:
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = shl i64 %index, 3
+; CHECK: %[[i0:.+]] = add i64 %offset.idx, 0
+; CHECK: %[[i1:.+]] = add i64 %offset.idx, 8
+; CHECK: getelementptr inbounds float, float* %a, i64 %[[i0]]
+; CHECK: getelementptr inbounds float, float* %a, i64 %[[i1]]
+; CHECK: getelementptr inbounds float, float* %b, i64 %[[i0]]
+; CHECK: getelementptr inbounds float, float* %b, i64 %[[i1]]
+;
+; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02(
+; UNROLL-NO-IC: vector.body:
+; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL-NO-IC: %offset.idx = shl i64 %index, 3
+; UNROLL-NO-IC: %[[i0:.+]] = add i64 %offset.idx, 0
+; UNROLL-NO-IC: %[[i1:.+]] = add i64 %offset.idx, 8
+; UNROLL-NO-IC: %[[i2:.+]] = add i64 %offset.idx, 16
+; UNROLL-NO-IC: %[[i3:.+]] = add i64 %offset.idx, 24
+; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i0]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i1]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i2]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i3]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i0]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i1]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i2]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i3]]
+;
; IND-LABEL: @scalarize_induction_variable_02(
; IND: vector.body:
; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
Modified: llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll?rev=275419&r1=275418&r2=275419&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/reverse_induction.ll Thu Jul 14 09:36:06 2016
@@ -8,21 +8,13 @@ target datalayout = "e-p:64:64:64-i1:8:8
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; CHECK: %offset.idx = sub i64 %startval, %index
; CHECK: %[[a0:.+]] = add i64 %offset.idx, 0
-; CHECK: %[[v0:.+]] = insertelement <4 x i64> undef, i64 %[[a0]], i64 0
; CHECK: %[[a1:.+]] = add i64 %offset.idx, -1
-; CHECK: %[[v1:.+]] = insertelement <4 x i64> %[[v0]], i64 %[[a1]], i64 1
; CHECK: %[[a2:.+]] = add i64 %offset.idx, -2
-; CHECK: %[[v2:.+]] = insertelement <4 x i64> %[[v1]], i64 %[[a2]], i64 2
; CHECK: %[[a3:.+]] = add i64 %offset.idx, -3
-; CHECK: %[[v3:.+]] = insertelement <4 x i64> %[[v2]], i64 %[[a3]], i64 3
; CHECK: %[[a4:.+]] = add i64 %offset.idx, -4
-; CHECK: %[[v4:.+]] = insertelement <4 x i64> undef, i64 %[[a4]], i64 0
; CHECK: %[[a5:.+]] = add i64 %offset.idx, -5
-; CHECK: %[[v5:.+]] = insertelement <4 x i64> %[[v4]], i64 %[[a5]], i64 1
; CHECK: %[[a6:.+]] = add i64 %offset.idx, -6
-; CHECK: %[[v6:.+]] = insertelement <4 x i64> %[[v5]], i64 %[[a6]], i64 2
; CHECK: %[[a7:.+]] = add i64 %offset.idx, -7
-; CHECK: %[[v7:.+]] = insertelement <4 x i64> %[[v6]], i64 %[[a7]], i64 3
define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) {
entry:
@@ -48,21 +40,13 @@ loopend:
; CHECK: %index = phi i128 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; CHECK: %offset.idx = sub i128 %startval, %index
; CHECK: %[[a0:.+]] = add i128 %offset.idx, 0
-; CHECK: %[[v0:.+]] = insertelement <4 x i128> undef, i128 %[[a0]], i64 0
; CHECK: %[[a1:.+]] = add i128 %offset.idx, -1
-; CHECK: %[[v1:.+]] = insertelement <4 x i128> %[[v0]], i128 %[[a1]], i64 1
; CHECK: %[[a2:.+]] = add i128 %offset.idx, -2
-; CHECK: %[[v2:.+]] = insertelement <4 x i128> %[[v1]], i128 %[[a2]], i64 2
; CHECK: %[[a3:.+]] = add i128 %offset.idx, -3
-; CHECK: %[[v3:.+]] = insertelement <4 x i128> %[[v2]], i128 %[[a3]], i64 3
; CHECK: %[[a4:.+]] = add i128 %offset.idx, -4
-; CHECK: %[[v4:.+]] = insertelement <4 x i128> undef, i128 %[[a4]], i64 0
; CHECK: %[[a5:.+]] = add i128 %offset.idx, -5
-; CHECK: %[[v5:.+]] = insertelement <4 x i128> %[[v4]], i128 %[[a5]], i64 1
; CHECK: %[[a6:.+]] = add i128 %offset.idx, -6
-; CHECK: %[[v6:.+]] = insertelement <4 x i128> %[[v5]], i128 %[[a6]], i64 2
; CHECK: %[[a7:.+]] = add i128 %offset.idx, -7
-; CHECK: %[[v7:.+]] = insertelement <4 x i128> %[[v6]], i128 %[[a7]], i64 3
define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) {
entry:
@@ -88,21 +72,13 @@ loopend:
; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; CHECK: %offset.idx = sub i16 %startval, {{.*}}
; CHECK: %[[a0:.+]] = add i16 %offset.idx, 0
-; CHECK: %[[v0:.+]] = insertelement <4 x i16> undef, i16 %[[a0]], i64 0
; CHECK: %[[a1:.+]] = add i16 %offset.idx, -1
-; CHECK: %[[v1:.+]] = insertelement <4 x i16> %[[v0]], i16 %[[a1]], i64 1
; CHECK: %[[a2:.+]] = add i16 %offset.idx, -2
-; CHECK: %[[v2:.+]] = insertelement <4 x i16> %[[v1]], i16 %[[a2]], i64 2
; CHECK: %[[a3:.+]] = add i16 %offset.idx, -3
-; CHECK: %[[v3:.+]] = insertelement <4 x i16> %[[v2]], i16 %[[a3]], i64 3
; CHECK: %[[a4:.+]] = add i16 %offset.idx, -4
-; CHECK: %[[v4:.+]] = insertelement <4 x i16> undef, i16 %[[a4]], i64 0
; CHECK: %[[a5:.+]] = add i16 %offset.idx, -5
-; CHECK: %[[v5:.+]] = insertelement <4 x i16> %[[v4]], i16 %[[a5]], i64 1
; CHECK: %[[a6:.+]] = add i16 %offset.idx, -6
-; CHECK: %[[v6:.+]] = insertelement <4 x i16> %[[v5]], i16 %[[a6]], i64 2
; CHECK: %[[a7:.+]] = add i16 %offset.idx, -7
-; CHECK: %[[v7:.+]] = insertelement <4 x i16> %[[v6]], i16 %[[a7]], i64 3
define i32 @reverse_induction_i16(i16 %startval, i32 * %ptr) {
entry:
More information about the llvm-commits
mailing list