[llvm] r276554 - [Loop Vectorizer] Handling loops FP induction variables.
Elena Demikhovsky via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 24 00:24:54 PDT 2016
Author: delena
Date: Sun Jul 24 02:24:54 2016
New Revision: 276554
URL: http://llvm.org/viewvc/llvm-project?rev=276554&view=rev
Log:
[Loop Vectorizer] Handling loops FP induction variables.
Allowed loop vectorization with secondary FP IVs. Like this:
float *A;
float x = init;
for (int i=0; i < N; ++i) {
A[i] = x;
x -= fp_inc;
}
The auto-vectorization is possible when the induction binary operator is "fast" or the function has "unsafe" attribute.
Differential Revision: https://reviews.llvm.org/D21330
Added:
llvm/trunk/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
llvm/trunk/test/Transforms/LoopVectorize/float-induction.ll
Modified:
llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp
llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
Modified: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h?rev=276554&r1=276553&r2=276554&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h Sun Jul 24 02:24:54 2016
@@ -263,13 +263,15 @@ public:
enum InductionKind {
IK_NoInduction, ///< Not an induction variable.
IK_IntInduction, ///< Integer induction variable. Step = C.
- IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem).
+ IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem).
+ IK_FpInduction ///< Floating point induction variable.
};
public:
/// Default constructor - creates an invalid induction.
InductionDescriptor()
- : StartValue(nullptr), IK(IK_NoInduction), Step(nullptr) {}
+ : StartValue(nullptr), IK(IK_NoInduction), Step(nullptr),
+ InductionBinOp(nullptr) {}
/// Get the consecutive direction. Returns:
/// 0 - unknown or non-consecutive.
@@ -291,26 +293,58 @@ public:
const SCEV *getStep() const { return Step; }
ConstantInt *getConstIntStepValue() const;
- /// Returns true if \p Phi is an induction. If \p Phi is an induction,
- /// the induction descriptor \p D will contain the data describing this
- /// induction. If by some other means the caller has a better SCEV
+ /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an
+ /// induction, the induction descriptor \p D will contain the data describing
+ /// this induction. If by some other means the caller has a better SCEV
/// expression for \p Phi than the one returned by the ScalarEvolution
/// analysis, it can be passed through \p Expr.
- static bool isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+ static bool isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE,
InductionDescriptor &D,
const SCEV *Expr = nullptr);
- /// Returns true if \p Phi is an induction, in the context associated with
- /// the run-time predicate of PSE. If \p Assume is true, this can add further
- /// SCEV predicates to \p PSE in order to prove that \p Phi is an induction.
+ /// Returns true if \p Phi is a floating point induction in the loop \p L.
+ /// If \p Phi is an induction, the induction descriptor \p D will contain
+ /// the data describing this induction.
+ static bool isFPInductionPHI(PHINode *Phi, const Loop* L,
+ ScalarEvolution *SE, InductionDescriptor &D);
+
+ /// Returns true if \p Phi is a loop \p L induction, in the context associated
+ /// with the run-time predicate of PSE. If \p Assume is true, this can add
+ /// further SCEV predicates to \p PSE in order to prove that \p Phi is an
+ /// induction.
/// If \p Phi is an induction, \p D will contain the data describing this
/// induction.
- static bool isInductionPHI(PHINode *Phi, PredicatedScalarEvolution &PSE,
+ static bool isInductionPHI(PHINode *Phi, const Loop* L,
+ PredicatedScalarEvolution &PSE,
InductionDescriptor &D, bool Assume = false);
+ /// Returns true if the induction type is FP and the binary operator does
+ /// not have the "fast-math" property. Such operation requires a relaxed FP
+ /// mode.
+ bool hasUnsafeAlgebra() {
+ return InductionBinOp &&
+ !cast<FPMathOperator>(InductionBinOp)->hasUnsafeAlgebra();
+ }
+
+ /// Returns induction operator that does not have "fast-math" property
+ /// and requires FP unsafe mode.
+ Instruction *getUnsafeAlgebraInst() {
+ if (!InductionBinOp ||
+ cast<FPMathOperator>(InductionBinOp)->hasUnsafeAlgebra())
+ return nullptr;
+ return InductionBinOp;
+ }
+
+ /// Returns binary opcode of the induction operator.
+ Instruction::BinaryOps getInductionOpcode() const {
+ return InductionBinOp ? InductionBinOp->getOpcode() :
+ Instruction::BinaryOpsEnd;
+ }
+
private:
/// Private constructor - used by \c isInductionPHI.
- InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step);
+ InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
+ BinaryOperator *InductionBinOp = nullptr);
/// Start value.
TrackingVH<Value> StartValue;
@@ -318,6 +352,8 @@ private:
InductionKind IK;
/// Step value.
const SCEV *Step;
+ // Instruction that advances induction variable.
+ BinaryOperator *InductionBinOp;
};
BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
Modified: llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp?rev=276554&r1=276553&r2=276554&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp Sun Jul 24 02:24:54 2016
@@ -703,7 +703,7 @@ bool LoopInterchangeLegality::findInduct
RecurrenceDescriptor RD;
InductionDescriptor ID;
PHINode *PHI = cast<PHINode>(I);
- if (InductionDescriptor::isInductionPHI(PHI, SE, ID))
+ if (InductionDescriptor::isInductionPHI(PHI, L, SE, ID))
Inductions.push_back(PHI);
else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
Reductions.push_back(PHI);
Modified: llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp?rev=276554&r1=276553&r2=276554&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp Sun Jul 24 02:24:54 2016
@@ -654,8 +654,8 @@ Value *RecurrenceDescriptor::createMinMa
}
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
- const SCEV *Step)
- : StartValue(Start), IK(K), Step(Step) {
+ const SCEV *Step, BinaryOperator *BOp)
+ : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
assert(IK != IK_NoInduction && "Not an induction");
// Start value type should match the induction kind and the value
@@ -672,7 +672,15 @@ InductionDescriptor::InductionDescriptor
assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
"Step value should be constant for pointer induction");
- assert(Step->getType()->isIntegerTy() && "StepValue is not an integer");
+ assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
+ "StepValue is not an integer");
+
+ assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
+ "StepValue is not FP for FpInduction");
+ assert((IK != IK_FpInduction || (InductionBinOp &&
+ (InductionBinOp->getOpcode() == Instruction::FAdd ||
+ InductionBinOp->getOpcode() == Instruction::FSub))) &&
+ "Binary opcode should be specified for FP induction");
}
int InductionDescriptor::getConsecutiveDirection() const {
@@ -693,6 +701,8 @@ Value *InductionDescriptor::transform(IR
const DataLayout& DL) const {
SCEVExpander Exp(*SE, DL, "induction");
+ assert(Index->getType() == Step->getType() &&
+ "Index type does not match StepValue type");
switch (IK) {
case IK_IntInduction: {
assert(Index->getType() == StartValue->getType() &&
@@ -717,29 +727,113 @@ Value *InductionDescriptor::transform(IR
return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
}
case IK_PtrInduction: {
- assert(Index->getType() == Step->getType() &&
- "Index type does not match StepValue type");
assert(isa<SCEVConstant>(Step) &&
"Expected constant step for pointer induction");
const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
return B.CreateGEP(nullptr, StartValue, Index);
}
+ case IK_FpInduction: {
+ assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
+ assert(InductionBinOp &&
+ (InductionBinOp->getOpcode() == Instruction::FAdd ||
+ InductionBinOp->getOpcode() == Instruction::FSub) &&
+ "Original bin op should be defined for FP induction");
+
+ Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
+
+ // Floating point operations had to be 'fast' to enable the induction.
+ FastMathFlags Flags;
+ Flags.setUnsafeAlgebra();
+
+ Value *MulExp = B.CreateFMul(StepValue, Index);
+ if (isa<Instruction>(MulExp))
+ // We have to check, the MulExp may be a constant.
+ cast<Instruction>(MulExp)->setFastMathFlags(Flags);
+
+ Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
+ MulExp, "induction");
+ if (isa<Instruction>(BOp))
+ cast<Instruction>(BOp)->setFastMathFlags(Flags);
+
+ return BOp;
+ }
case IK_NoInduction:
return nullptr;
}
llvm_unreachable("invalid enum");
}
-bool InductionDescriptor::isInductionPHI(PHINode *Phi,
+bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
+ ScalarEvolution *SE,
+ InductionDescriptor &D) {
+
+ // Here we only handle FP induction variables.
+ assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
+
+ if (TheLoop->getHeader() != Phi->getParent())
+ return false;
+
+ // The loop may have multiple entrances or multiple exits; we can analyze
+ // this phi if it has a unique entry value and a unique backedge value.
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+ Value *BEValue = nullptr, *StartValue = nullptr;
+ if (TheLoop->contains(Phi->getIncomingBlock(0))) {
+ BEValue = Phi->getIncomingValue(0);
+ StartValue = Phi->getIncomingValue(1);
+ } else {
+ assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
+ "Unexpected Phi node in the loop");
+ BEValue = Phi->getIncomingValue(1);
+ StartValue = Phi->getIncomingValue(0);
+ }
+
+ BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
+ if (!BOp)
+ return false;
+
+ Value *Addend = nullptr;
+ if (BOp->getOpcode() == Instruction::FAdd) {
+ if (BOp->getOperand(0) == Phi)
+ Addend = BOp->getOperand(1);
+ else if (BOp->getOperand(1) == Phi)
+ Addend = BOp->getOperand(0);
+ } else if (BOp->getOpcode() == Instruction::FSub)
+ if (BOp->getOperand(0) == Phi)
+ Addend = BOp->getOperand(1);
+
+ if (!Addend)
+ return false;
+
+ // The addend should be loop invariant
+ if (auto *I = dyn_cast<Instruction>(Addend))
+ if (TheLoop->contains(I))
+ return false;
+
+ // FP Step has unknown SCEV
+ const SCEV *Step = SE->getUnknown(Addend);
+ D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
+ return true;
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
PredicatedScalarEvolution &PSE,
InductionDescriptor &D,
bool Assume) {
Type *PhiTy = Phi->getType();
- // We only handle integer and pointer inductions variables.
- if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+
+ // Handle integer and pointer inductions variables.
+ // Now we handle also FP induction but not trying to make a
+ // recurrent expression from the PHI node in-place.
+
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() &&
+ !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
return false;
+ if (PhiTy->isFloatingPointTy())
+ return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
+
const SCEV *PhiScev = PSE.getSCEV(Phi);
const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
@@ -752,10 +846,10 @@ bool InductionDescriptor::isInductionPHI
return false;
}
- return isInductionPHI(Phi, PSE.getSE(), D, AR);
+ return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
}
-bool InductionDescriptor::isInductionPHI(PHINode *Phi,
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
ScalarEvolution *SE,
InductionDescriptor &D,
const SCEV *Expr) {
@@ -773,7 +867,7 @@ bool InductionDescriptor::isInductionPHI
return false;
}
- assert(AR->getLoop()->getHeader() == Phi->getParent() &&
+ assert(TheLoop->getHeader() == Phi->getParent() &&
"PHI is an AddRec for a different loop?!");
Value *StartValue =
Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
@@ -781,7 +875,7 @@ bool InductionDescriptor::isInductionPHI
// Calculate the pointer stride and check if it is consecutive.
// The stride may be a constant or a loop invariant integer value.
const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
- if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop()))
+ if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
return false;
if (PhiTy->isIntegerTy()) {
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=276554&r1=276553&r2=276554&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Sun Jul 24 02:24:54 2016
@@ -402,7 +402,10 @@ protected:
/// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
/// to each vector element of Val. The sequence starts at StartIndex.
- virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
+ /// \p Opcode is relevant for FP induction variable.
+ virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step,
+ Instruction::BinaryOps Opcode =
+ Instruction::BinaryOpsEnd);
/// Compute scalar induction steps. \p ScalarIV is the scalar induction
/// variable on which to base the steps, \p Step is the size of the step, and
@@ -625,7 +628,9 @@ private:
bool IfPredicateStore = false) override;
void vectorizeMemoryInstruction(Instruction *Instr) override;
Value *getBroadcastInstrs(Value *V) override;
- Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
+ Value *getStepVector(Value *Val, int StartIdx, Value *Step,
+ Instruction::BinaryOps Opcode =
+ Instruction::BinaryOpsEnd) override;
Value *reverseVector(Value *Vec) override;
};
@@ -2000,32 +2005,60 @@ void InnerLoopVectorizer::widenIntInduct
}
}
-Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
- Value *Step) {
+Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
+ Instruction::BinaryOps BinOp) {
+ // Create and check the types.
assert(Val->getType()->isVectorTy() && "Must be a vector");
- assert(Val->getType()->getScalarType()->isIntegerTy() &&
- "Elem must be an integer");
- assert(Step->getType() == Val->getType()->getScalarType() &&
- "Step has wrong type");
- // Create the types.
- Type *ITy = Val->getType()->getScalarType();
- VectorType *Ty = cast<VectorType>(Val->getType());
- int VLen = Ty->getNumElements();
+ int VLen = Val->getType()->getVectorNumElements();
+
+ Type *STy = Val->getType()->getScalarType();
+ assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
+ "Induction Step must be an integer or FP");
+ assert(Step->getType() == STy && "Step has wrong type");
+
SmallVector<Constant *, 8> Indices;
+ if (STy->isIntegerTy()) {
+ // Create a vector of consecutive numbers from zero to VF.
+ for (int i = 0; i < VLen; ++i)
+ Indices.push_back(ConstantInt::get(STy, StartIdx + i));
+
+ // Add the consecutive indices to the vector value.
+ Constant *Cv = ConstantVector::get(Indices);
+ assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
+ Step = Builder.CreateVectorSplat(VLen, Step);
+ assert(Step->getType() == Val->getType() && "Invalid step vec");
+ // FIXME: The newly created binary instructions should contain nsw/nuw flags,
+ // which can be found from the original scalar operations.
+ Step = Builder.CreateMul(Cv, Step);
+ return Builder.CreateAdd(Val, Step, "induction");
+ }
+
+ // Floating point induction.
+ assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
+ "Binary Opcode should be specified for FP induction");
// Create a vector of consecutive numbers from zero to VF.
for (int i = 0; i < VLen; ++i)
- Indices.push_back(ConstantInt::get(ITy, StartIdx + i));
+ Indices.push_back(ConstantFP::get(STy, (double)(StartIdx + i)));
// Add the consecutive indices to the vector value.
Constant *Cv = ConstantVector::get(Indices);
- assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
+
Step = Builder.CreateVectorSplat(VLen, Step);
- assert(Step->getType() == Val->getType() && "Invalid step vec");
- // FIXME: The newly created binary instructions should contain nsw/nuw flags,
- // which can be found from the original scalar operations.
- Step = Builder.CreateMul(Cv, Step);
- return Builder.CreateAdd(Val, Step, "induction");
+
+ // Floating point operations had to be 'fast' to enable the induction.
+ FastMathFlags Flags;
+ Flags.setUnsafeAlgebra();
+
+ Value *MulOp = Builder.CreateFMul(Cv, Step);
+ if (isa<Instruction>(MulOp))
+ // Have to check, MulOp may be a constant
+ cast<Instruction>(MulOp)->setFastMathFlags(Flags);
+
+ Value *BOp = Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
+ if (isa<Instruction>(BOp))
+ cast<Instruction>(BOp)->setFastMathFlags(Flags);
+ return BOp;
}
void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
@@ -3099,8 +3132,10 @@ void InnerLoopVectorizer::createEmptyLoo
EndValue = CountRoundDown;
} else {
IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
- Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
- II.getStep()->getType(), "cast.crd");
+ Type *StepType = II.getStep()->getType();
+ Instruction::CastOps CastOp =
+ CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
+ Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd");
const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
EndValue = II.transform(B, CRD, PSE.getSE(), DL);
EndValue->setName("ind.end");
@@ -4047,7 +4082,7 @@ void InnerLoopVectorizer::widenPHIInstru
llvm_unreachable("Unknown induction");
case InductionDescriptor::IK_IntInduction:
return widenIntInduction(P, Entry);
- case InductionDescriptor::IK_PtrInduction:
+ case InductionDescriptor::IK_PtrInduction: {
// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");
// This is the normalized GEP that starts counting at zero.
@@ -4080,6 +4115,29 @@ void InnerLoopVectorizer::widenPHIInstru
}
return;
}
+ case InductionDescriptor::IK_FpInduction: {
+ assert(P->getType() == II.getStartValue()->getType() &&
+ "Types must match");
+ // Handle other induction variables that are now based on the
+ // canonical one.
+ assert(P != OldInduction && "Primary induction can be integer only");
+
+ Value *V = Builder.CreateCast(Instruction::SIToFP, Induction, P->getType());
+ V = II.transform(Builder, V, PSE.getSE(), DL);
+ V->setName("fp.offset.idx");
+
+ // Now we have scalar op: %fp.offset.idx = StartVal +/- Induction*StepVal
+
+ Value *Broadcasted = getBroadcastInstrs(V);
+ // After broadcasting the induction variable we need to make the vector
+ // consecutive by adding StepVal*0, StepVal*1, StepVal*2, etc.
+ Value *StepVal = cast<SCEVUnknown>(II.getStep())->getValue();
+ for (unsigned part = 0; part < UF; ++part)
+ Entry[part] = getStepVector(Broadcasted, VF * part, StepVal,
+ II.getInductionOpcode());
+ return;
+ }
+ }
}
void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
@@ -4565,10 +4623,12 @@ void LoopVectorizationLegality::addInduc
const DataLayout &DL = Phi->getModule()->getDataLayout();
// Get the widest type.
- if (!WidestIndTy)
- WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
- else
- WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
+ if (!PhiTy->isFloatingPointTy()) {
+ if (!WidestIndTy)
+ WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
+ else
+ WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
+ }
// Int inductions are special because we only allow one IV.
if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
@@ -4649,8 +4709,10 @@ bool LoopVectorizationLegality::canVecto
}
InductionDescriptor ID;
- if (InductionDescriptor::isInductionPHI(Phi, PSE, ID)) {
+ if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
addInductionPhi(Phi, ID, AllowedExit);
+ if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
+ Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
continue;
}
@@ -4661,7 +4723,7 @@ bool LoopVectorizationLegality::canVecto
// As a last resort, coerce the PHI to a AddRec expression
// and re-try classifying it a an induction PHI.
- if (InductionDescriptor::isInductionPHI(Phi, PSE, ID, true)) {
+ if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
addInductionPhi(Phi, ID, AllowedExit);
continue;
}
@@ -6348,11 +6410,20 @@ Value *InnerLoopUnroller::reverseVector(
Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
-Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
+Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step,
+ Instruction::BinaryOps BinOp) {
// When unrolling and the VF is 1, we only need to add a simple scalar.
- Type *ITy = Val->getType();
- assert(!ITy->isVectorTy() && "Val must be a scalar");
- Constant *C = ConstantInt::get(ITy, StartIdx);
+ Type *Ty = Val->getType();
+ assert(!Ty->isVectorTy() && "Val must be a scalar");
+
+ if (Ty->isFloatingPointTy()) {
+ Constant *C = ConstantFP::get(Ty, (double)StartIdx);
+
+ // Floating point operations had to be 'fast' to enable the unrolling.
+ Value *MulOp = addFastMathFlag(Builder.CreateFMul(C, Step));
+ return addFastMathFlag(Builder.CreateBinOp(BinOp, Val, MulOp));
+ }
+ Constant *C = ConstantInt::get(Ty, StartIdx);
return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
}
Added: llvm/trunk/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/float-induction-x86.ll?rev=276554&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/float-induction-x86.ll Sun Jul 24 02:24:54 2016
@@ -0,0 +1,86 @@
+; RUN: opt < %s -O3 -mcpu=core-avx2 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefix AUTO_VEC %s
+
+; This test checks auto-vectorization with FP induction variable.
+; The FP operation is not "fast" and requires "fast-math" function attribute.
+
+;void fp_iv_loop1(float * __restrict__ A, int N) {
+; float x = 1.0;
+; for (int i=0; i < N; ++i) {
+; A[i] = x;
+; x += 0.5;
+; }
+;}
+
+
+; AUTO_VEC-LABEL: @fp_iv_loop1(
+; AUTO_VEC: vector.body
+; AUTO_VEC: store <8 x float>
+
+define void @fp_iv_loop1(float* noalias nocapture %A, i32 %N) #0 {
+entry:
+ %cmp4 = icmp sgt i32 %N, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ store float %x.06, float* %arrayidx, align 4
+ %conv1 = fadd float %x.06, 5.000000e-01
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+; The same as the previous, FP operation is not fast, different function attribute
+; Vectorization should be rejected.
+;void fp_iv_loop2(float * __restrict__ A, int N) {
+; float x = 1.0;
+; for (int i=0; i < N; ++i) {
+; A[i] = x;
+; x += 0.5;
+; }
+;}
+
+; AUTO_VEC-LABEL: @fp_iv_loop2(
+; AUTO_VEC-NOT: vector.body
+; AUTO_VEC-NOT: store <{{.*}} x float>
+
+define void @fp_iv_loop2(float* noalias nocapture %A, i32 %N) #1 {
+entry:
+ %cmp4 = icmp sgt i32 %N, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ store float %x.06, float* %arrayidx, align 4
+ %conv1 = fadd float %x.06, 5.000000e-01
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+attributes #0 = { "no-nans-fp-math"="true" }
+attributes #1 = { "no-nans-fp-math"="false" }
Added: llvm/trunk/test/Transforms/LoopVectorize/float-induction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/float-induction.ll?rev=276554&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/float-induction.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/float-induction.ll Sun Jul 24 02:24:54 2016
@@ -0,0 +1,218 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s
+
+; VEC4_INTERL1-LABEL: @fp_iv_loop1(
+; VEC4_INTERL1: %[[FP_INC:.*]] = load float, float* @fp_inc
+; VEC4_INTERL1: vector.body:
+; VEC4_INTERL1: %[[FP_INDEX:.*]] = sitofp i64 {{.*}} to float
+; VEC4_INTERL1: %[[VEC_INCR:.*]] = fmul fast float {{.*}}, %[[FP_INDEX]]
+; VEC4_INTERL1: %[[FP_OFFSET_IDX:.*]] = fsub fast float %init, %[[VEC_INCR]]
+; VEC4_INTERL1: %[[BRCT_INSERT:.*]] = insertelement <4 x float> undef, float %[[FP_OFFSET_IDX]], i32 0
+; VEC4_INTERL1-NEXT: %[[BRCT_SPLAT:.*]] = shufflevector <4 x float> %[[BRCT_INSERT]], <4 x float> undef, <4 x i32> zeroinitializer
+; VEC4_INTERL1: %[[BRCT_INSERT:.*]] = insertelement {{.*}} %[[FP_INC]]
+; VEC4_INTERL1-NEXT: %[[FP_INC_BCST:.*]] = shufflevector <4 x float> %[[BRCT_INSERT]], {{.*}} zeroinitializer
+; VEC4_INTERL1: %[[VSTEP:.*]] = fmul fast <4 x float> %[[FP_INC_BCST]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
+; VEC4_INTERL1-NEXT: %[[VEC_INDUCTION:.*]] = fsub fast <4 x float> %[[BRCT_SPLAT]], %[[VSTEP]]
+; VEC4_INTERL1: store <4 x float> %[[VEC_INDUCTION]]
+
+; VEC4_INTERL2-LABEL: @fp_iv_loop1(
+; VEC4_INTERL2: %[[FP_INC:.*]] = load float, float* @fp_inc
+; VEC4_INTERL2: vector.body:
+; VEC4_INTERL2: %[[INDEX:.*]] = sitofp i64 {{.*}} to float
+; VEC4_INTERL2: %[[VEC_INCR:.*]] = fmul fast float %{{.*}}, %[[INDEX]]
+; VEC4_INTERL2: fsub fast float %init, %[[VEC_INCR]]
+; VEC4_INTERL2: %[[VSTEP1:.*]] = fmul fast <4 x float> %{{.*}}, <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
+; VEC4_INTERL2-NEXT: %[[VEC_INDUCTION1:.*]] = fsub fast <4 x float> {{.*}}, %[[VSTEP1]]
+; VEC4_INTERL2: %[[VSTEP2:.*]] = fmul fast <4 x float> %{{.*}}, <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00>
+; VEC4_INTERL2-NEXT: %[[VEC_INDUCTION2:.*]] = fsub fast <4 x float> {{.*}}, %[[VSTEP2]]
+; VEC4_INTERL2: store <4 x float> %[[VEC_INDUCTION1]]
+; VEC4_INTERL2: store <4 x float> %[[VEC_INDUCTION2]]
+
+; VEC1_INTERL2-LABEL: @fp_iv_loop1(
+; VEC1_INTERL2: %[[FP_INC:.*]] = load float, float* @fp_inc
+; VEC1_INTERL2: vector.body:
+; VEC1_INTERL2: %[[INDEX:.*]] = sitofp i64 {{.*}} to float
+; VEC1_INTERL2: %[[STEP:.*]] = fmul fast float %{{.*}}, %[[INDEX]]
+; VEC1_INTERL2: %[[FP_OFFSET_IDX:.*]] = fsub fast float %init, %[[STEP]]
+; VEC1_INTERL2: %[[SCALAR_INDUCTION2:.*]] = fsub fast float %[[FP_OFFSET_IDX]], %[[FP_INC]]
+; VEC1_INTERL2: store float %[[FP_OFFSET_IDX]]
+; VEC1_INTERL2: store float %[[SCALAR_INDUCTION2]]
+
+ at fp_inc = common global float 0.000000e+00, align 4
+
+;void fp_iv_loop1(float init, float * __restrict__ A, int N) {
+; float x = init;
+; for (int i=0; i < N; ++i) {
+; A[i] = x;
+; x -= fp_inc;
+; }
+;}
+
+define void @fp_iv_loop1(float %init, float* noalias nocapture %A, i32 %N) #1 {
+entry:
+ %cmp4 = icmp sgt i32 %N, 0
+ br i1 %cmp4, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %fpinc = load float, float* @fp_inc, align 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ store float %x.05, float* %arrayidx, align 4
+ %add = fsub fast float %x.05, %fpinc
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+;void fp_iv_loop2(float init, float * __restrict__ A, int N) {
+; float x = init;
+; for (int i=0; i < N; ++i) {
+; A[i] = x;
+; x += 0.5;
+; }
+;}
+
+; VEC4_INTERL1-LABEL: @fp_iv_loop2(
+; VEC4_INTERL1: vector.body
+; VEC4_INTERL1: %[[index:.*]] = phi i64 [ 0, %vector.ph ]
+; VEC4_INTERL1: sitofp i64 %[[index]] to float
+; VEC4_INTERL1: %[[VAR1:.*]] = fmul fast float {{.*}}, 5.000000e-01
+; VEC4_INTERL1: %[[VAR2:.*]] = fadd fast float %[[VAR1]]
+; VEC4_INTERL1: insertelement <4 x float> undef, float %[[VAR2]], i32 0
+; VEC4_INTERL1: shufflevector <4 x float> {{.*}}, <4 x float> undef, <4 x i32> zeroinitializer
+; VEC4_INTERL1: fadd fast <4 x float> {{.*}}, <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
+; VEC4_INTERL1: store <4 x float>
+
+define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
+entry:
+ %cmp4 = icmp sgt i32 %N, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ store float %x.06, float* %arrayidx, align 4
+ %conv1 = fadd fast float %x.06, 5.000000e-01
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) {
+; int i = 0;
+; float x = init;
+; float y = 0.1;
+; for (; i < N; ++i) {
+; A[i] = x;
+; x += fp_inc;
+; y -= 0.5;
+; B[i] = x + y;
+; C[i] = y;
+; }
+;}
+; VEC4_INTERL1-LABEL: @fp_iv_loop3(
+; VEC4_INTERL1: vector.body
+; VEC4_INTERL1: %[[index:.*]] = phi i64 [ 0, %vector.ph ]
+; VEC4_INTERL1: sitofp i64 %[[index]] to float
+; VEC4_INTERL1: %[[VAR1:.*]] = fmul fast float {{.*}}, -5.000000e-01
+; VEC4_INTERL1: fadd fast float %[[VAR1]]
+; VEC4_INTERL1: fadd fast <4 x float> {{.*}}, <float -5.000000e-01, float -1.000000e+00, float -1.500000e+00, float -2.000000e+00>
+; VEC4_INTERL1: store <4 x float>
+
+define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) #1 {
+entry:
+ %cmp9 = icmp sgt i32 %N, 0
+ br i1 %cmp9, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %0 = load float, float* @fp_inc, align 4
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ]
+ %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ store float %x.011, float* %arrayidx, align 4
+ %add = fadd fast float %x.011, %0
+ %conv1 = fadd fast float %y.012, -5.000000e-01
+ %add2 = fadd fast float %conv1, %add
+ %arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv
+ store float %add2, float* %arrayidx4, align 4
+ %arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv
+ store float %conv1, float* %arrayidx6, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; Start and step values are constants. There is no 'fmul' operation in this case
+;void fp_iv_loop4(float * __restrict__ A, int N) {
+; float x = 1.0;
+; for (int i=0; i < N; ++i) {
+; A[i] = x;
+; x += 0.5;
+; }
+;}
+
+; VEC4_INTERL1-LABEL: @fp_iv_loop4(
+; VEC4_INTERL1: vector.body
+; VEC4_INTERL1-NOT: fmul fast <4 x float>
+; VEC4_INTERL1: %[[induction:.*]] = fadd fast <4 x float> %{{.*}}, <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
+; VEC4_INTERL1: store <4 x float> %[[induction]]
+
+define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
+entry:
+ %cmp4 = icmp sgt i32 %N, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ store float %x.06, float* %arrayidx, align 4
+ %conv1 = fadd fast float %x.06, 5.000000e-01
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
More information about the llvm-commits
mailing list