[llvm-commits] [llvm] r169217 - in /llvm/trunk: lib/Transforms/Vectorize/LoopVectorize.cpp test/Transforms/LoopVectorize/if-conversion.ll
Nadav Rotem
nrotem at apple.com
Mon Dec 3 22:15:12 PST 2012
Author: nadav
Date: Tue Dec 4 00:15:11 2012
New Revision: 169217
URL: http://llvm.org/viewvc/llvm-project?rev=169217&view=rev
Log:
Add the last part that is needed for vectorization of if-converted code.
Added the code that actually performs the if-conversion during vectorization.
We can now vectorize this code:
for (int i=0; i<n; ++i) {
unsigned k = 0;
if (a[i] > b[i]) <------ IF inside the loop.
k = k * 5 + 3;
a[i] = k; <---- K is a phi node that becomes vector-select.
}
Added:
llvm/trunk/test/Transforms/LoopVectorize/if-conversion.ll
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=169217&r1=169216&r2=169217&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Dec 4 00:15:11 2012
@@ -50,6 +50,7 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
@@ -134,6 +135,9 @@
}
private:
+ /// A small list of PHINodes.
+ typedef SmallVector<PHINode*, 4> PhiVector;
+
/// Add code that checks at runtime if the accessed arrays overlap.
/// Returns the comperator value or NULL if no check is needed.
Value *addRuntimeCheck(LoopVectorizationLegality *Legal,
@@ -142,6 +146,19 @@
void createEmptyLoop(LoopVectorizationLegality *Legal);
/// Copy and widen the instructions from the old loop.
void vectorizeLoop(LoopVectorizationLegality *Legal);
+
+ /// A helper function that computes the predicate of the block BB, assuming
+ /// that the header block of the loop is set to True. It returns the *entry*
+ /// mask for the block BB.
+ Value *createBlockInMask(BasicBlock *BB);
+ /// A helper function that computes the predicate of the edge between SRC
+ /// and DST.
+ Value *createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
+
+ /// A helper function to vectorize a single BB within the innermost loop.
+ void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
+ PhiVector *PV);
+
/// Insert the new loop to the loop hierarchy and pass manager
/// and update the analysis passes.
void updateAnalysis();
@@ -816,7 +833,7 @@
DL->getIntPtrType(SE->getContext());
// Find the loop boundaries.
- const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
+ const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch());
assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
// Get the total trip count from the count by adding 1.
@@ -838,7 +855,6 @@
OldInduction->getIncomingValueForBlock(BypassBlock):
ConstantInt::get(IdxTy, 0);
- assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
assert(BypassBlock && "Invalid loop structure");
// Generate the code that checks in runtime if arrays overlap.
@@ -1044,7 +1060,6 @@
// the cost-model.
//
//===------------------------------------------------===//
- typedef SmallVector<PHINode*, 4> PhiVector;
BasicBlock &BB = *OrigLoop->getHeader();
Constant *Zero = ConstantInt::get(
IntegerType::getInt32Ty(BB.getContext()), 0);
@@ -1059,24 +1074,220 @@
// construct the PHI.
PhiVector RdxPHIsToFix;
- // For each instruction in the old loop.
- for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
- Instruction *Inst = it;
+ // Scan the loop in a topological order to ensure that defs are vectorized
+ // before users.
+ LoopBlocksDFS DFS(OrigLoop);
+ DFS.perform(LI);
+
+ // Vectorize all of the blocks in the original loop.
+ for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
+ be = DFS.endRPO(); bb != be; ++bb)
+ vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
+
+ // At this point every instruction in the original loop is widened to
+ // a vector form. We are almost done. Now, we need to fix the PHI nodes
+ // that we vectorized. The PHI nodes are currently empty because we did
+ // not want to introduce cycles. Notice that the remaining PHI nodes
+ // that we need to fix are reduction variables.
+
+ // Create the 'reduced' values for each of the induction vars.
+ // The reduced values are the vector values that we scalarize and combine
+ // after the loop is finished.
+ for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
+ it != e; ++it) {
+ PHINode *RdxPhi = *it;
+ PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]);
+ assert(RdxPhi && "Unable to recover vectorized PHI");
+
+ // Find the reduction variable descriptor.
+ assert(Legal->getReductionVars()->count(RdxPhi) &&
+ "Unable to find the reduction variable");
+ LoopVectorizationLegality::ReductionDescriptor RdxDesc =
+ (*Legal->getReductionVars())[RdxPhi];
+
+ // We need to generate a reduction vector from the incoming scalar.
+ // To do so, we need to generate the 'identity' vector and overide
+ // one of the elements with the incoming scalar reduction. We need
+ // to do it in the vector-loop preheader.
+ Builder.SetInsertPoint(LoopBypassBlock->getTerminator());
+
+ // This is the vector-clone of the value that leaves the loop.
+ Value *VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
+ Type *VecTy = VectorExit->getType();
+
+ // Find the reduction identity variable. Zero for addition, or, xor,
+ // one for multiplication, -1 for And.
+ Constant *Identity = getUniformVector(getReductionIdentity(RdxDesc.Kind),
+ VecTy->getScalarType());
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ Value *VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+
+ // Fix the vector-loop phi.
+ // We created the induction variable so we know that the
+ // preheader is the first entry.
+ BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
+
+ // Reductions do not have to start at zero. They can start with
+ // any loop invariant values.
+ VecRdxPhi->addIncoming(VectorStart, VecPreheader);
+ unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
+ Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx));
+ VecRdxPhi->addIncoming(Val, LoopVectorBody);
+
+ // Before each round, move the insertion point right between
+ // the PHIs and the values we are going to write.
+ // This allows us to write both PHINodes and the extractelement
+ // instructions.
+ Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
+
+ // This PHINode contains the vectorized reduction variable, or
+ // the initial value vector, if we bypass the vector loop.
+ PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
+ NewPhi->addIncoming(VectorStart, LoopBypassBlock);
+ NewPhi->addIncoming(getVectorValue(RdxDesc.LoopExitInstr), LoopVectorBody);
+
+ // Extract the first scalar.
+ Value *Scalar0 =
+ Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
+ // Extract and reduce the remaining vector elements.
+ for (unsigned i=1; i < VF; ++i) {
+ Value *Scalar1 =
+ Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
+ switch (RdxDesc.Kind) {
+ case LoopVectorizationLegality::IntegerAdd:
+ Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerMult:
+ Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerOr:
+ Scalar0 = Builder.CreateOr(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerAnd:
+ Scalar0 = Builder.CreateAnd(Scalar0, Scalar1);
+ break;
+ case LoopVectorizationLegality::IntegerXor:
+ Scalar0 = Builder.CreateXor(Scalar0, Scalar1);
+ break;
+ default:
+ llvm_unreachable("Unknown reduction operation");
+ }
+ }
+
+ // Now, we need to fix the users of the reduction variable
+ // inside and outside of the scalar remainder loop.
+ // We know that the loop is in LCSSA form. We need to update the
+ // PHI nodes in the exit blocks.
+ for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
+ LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+ PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
+ if (!LCSSAPhi) continue;
+
+ // All PHINodes need to have a single entry edge, or two if
+ // we already fixed them.
+ assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
+
+ // We found our reduction value exit-PHI. Update it with the
+ // incoming bypass edge.
+ if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
+ // Add an edge coming from the bypass.
+ LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
+ break;
+ }
+ }// end of the LCSSA phi scan.
+
+ // Fix the scalar loop reduction variable with the incoming reduction sum
+ // from the vector body and from the backedge value.
+ int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
+ int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block.
+ (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
+ (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
+ }// end of for each redux variable.
+}
+
+Value *InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
+ assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) &&
+ "Invalid edge");
+
+ Value *SrcMask = createBlockInMask(Src);
+
+ // The terminator has to be a branch inst!
+ BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
+ assert(BI && "Unexpected terminator found");
+
+ Value *EdgeMask = SrcMask;
+ if (BI->isConditional()) {
+ EdgeMask = getVectorValue(BI->getCondition());
+ if (BI->getSuccessor(0) != Dst)
+ EdgeMask = Builder.CreateNot(EdgeMask);
+ }
+
+ return Builder.CreateAnd(EdgeMask, SrcMask);
+}
+
+Value *InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
+ assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
- switch (Inst->getOpcode()) {
+ // Loop incoming mask is all-one.
+ if (OrigLoop->getHeader() == BB)
+ return getVectorValue(
+ ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1));
+
+ // This is the block mask. We OR all incoming edges, and with zero.
+ Value *BlockMask = getVectorValue(
+ ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0));
+
+ // For each pred:
+ for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it)
+ BlockMask = Builder.CreateOr(BlockMask, createEdgeMask(*it, BB));
+
+ return BlockMask;
+}
+
+void
+InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
+ BasicBlock *BB, PhiVector *PV) {
+ Constant *Zero =
+ ConstantInt::get(IntegerType::getInt32Ty(BB->getContext()), 0);
+
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ switch (it->getOpcode()) {
case Instruction::Br:
// Nothing to do for PHIs and BR, since we already took care of the
// loop control flow instructions.
continue;
case Instruction::PHI:{
- PHINode* P = cast<PHINode>(Inst);
+ PHINode* P = cast<PHINode>(it);
// Handle reduction variables:
if (Legal->getReductionVars()->count(P)) {
// This is phase one of vectorizing PHIs.
- Type *VecTy = VectorType::get(Inst->getType(), VF);
- WidenMap[Inst] = PHINode::Create(VecTy, 2, "vec.phi",
- LoopVectorBody->getFirstInsertionPt());
- RdxPHIsToFix.push_back(P);
+ Type *VecTy = VectorType::get(it->getType(), VF);
+ WidenMap[it] =
+ PHINode::Create(VecTy, 2, "vec.phi",
+ LoopVectorBody->getFirstInsertionPt());
+ PV->push_back(P);
+ continue;
+ }
+
+ // Check for PHI nodes that are lowered to vector selects.
+ if (P->getParent() != OrigLoop->getHeader()) {
+ // We know that all PHIs in non header blocks are converted into
+ // selects, so we don't have to worry about the insertion order and we
+ // can just use the builder.
+
+ // At this point we generate the predication tree. There may be
+ // duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+ Value *Cond = createBlockInMask(P->getIncomingBlock(0));
+ WidenMap[P] =
+ Builder.CreateSelect(Cond,
+ getVectorValue(P->getIncomingValue(0)),
+ getVectorValue(P->getIncomingValue(1)),
+ "predphi");
continue;
}
@@ -1099,8 +1310,8 @@
// Handle pointer inductions.
assert(P->getType()->isPointerTy() && "Unexpected type.");
Value *StartIdx = OldInduction ?
- Legal->getInductionVars()->lookup(OldInduction) :
- ConstantInt::get(Induction->getType(), 0);
+ Legal->getInductionVars()->lookup(OldInduction) :
+ ConstantInt::get(Induction->getType(), 0);
// This is the pointer value coming into the loop.
Value *StartPtr = Legal->getInductionVars()->lookup(P);
@@ -1121,7 +1332,7 @@
"insert.gep");
}
- WidenMap[Inst] = VecVal;
+ WidenMap[it] = VecVal;
continue;
}
case Instruction::Add:
@@ -1143,13 +1354,13 @@
case Instruction::Or:
case Instruction::Xor: {
// Just widen binops.
- BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
- Value *A = getVectorValue(Inst->getOperand(0));
- Value *B = getVectorValue(Inst->getOperand(1));
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it);
+ Value *A = getVectorValue(it->getOperand(0));
+ Value *B = getVectorValue(it->getOperand(1));
// Use this vector value for all users of the original instruction.
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
- WidenMap[Inst] = V;
+ WidenMap[it] = V;
// Update the NSW, NUW and Exact flags.
BinaryOperator *VecOp = cast<BinaryOperator>(V);
@@ -1165,7 +1376,7 @@
// Widen selects.
// If the selector is loop invariant we can create a select
// instruction with a scalar condition. Otherwise, use vector-select.
- Value *Cond = Inst->getOperand(0);
+ Value *Cond = it->getOperand(0);
bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop);
// The condition can be loop invariant but still defined inside the
@@ -1176,29 +1387,29 @@
if (InvariantCond)
Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0));
- Value *Op0 = getVectorValue(Inst->getOperand(1));
- Value *Op1 = getVectorValue(Inst->getOperand(2));
- WidenMap[Inst] = Builder.CreateSelect(Cond, Op0, Op1);
+ Value *Op0 = getVectorValue(it->getOperand(1));
+ Value *Op1 = getVectorValue(it->getOperand(2));
+ WidenMap[it] = Builder.CreateSelect(Cond, Op0, Op1);
break;
}
case Instruction::ICmp:
case Instruction::FCmp: {
// Widen compares. Generate vector compares.
- bool FCmp = (Inst->getOpcode() == Instruction::FCmp);
- CmpInst *Cmp = dyn_cast<CmpInst>(Inst);
- Value *A = getVectorValue(Inst->getOperand(0));
- Value *B = getVectorValue(Inst->getOperand(1));
+ bool FCmp = (it->getOpcode() == Instruction::FCmp);
+ CmpInst *Cmp = dyn_cast<CmpInst>(it);
+ Value *A = getVectorValue(it->getOperand(0));
+ Value *B = getVectorValue(it->getOperand(1));
if (FCmp)
- WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+ WidenMap[it] = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
else
- WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+ WidenMap[it] = Builder.CreateICmp(Cmp->getPredicate(), A, B);
break;
}
case Instruction::Store: {
// Attempt to issue a wide store.
- StoreInst *SI = dyn_cast<StoreInst>(Inst);
+ StoreInst *SI = dyn_cast<StoreInst>(it);
Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
Value *Ptr = SI->getPointerOperand();
unsigned Alignment = SI->getAlignment();
@@ -1210,7 +1421,7 @@
// This store does not use GEPs.
if (!Legal->isConsecutivePtr(Ptr)) {
- scalarizeInstruction(Inst);
+ scalarizeInstruction(it);
break;
}
@@ -1237,7 +1448,7 @@
}
case Instruction::Load: {
// Attempt to issue a wide load.
- LoadInst *LI = dyn_cast<LoadInst>(Inst);
+ LoadInst *LI = dyn_cast<LoadInst>(it);
Type *RetTy = VectorType::get(LI->getType(), VF);
Value *Ptr = LI->getPointerOperand();
unsigned Alignment = LI->getAlignment();
@@ -1247,7 +1458,7 @@
// scalarize the load.
bool Con = Legal->isConsecutivePtr(Ptr);
if (Legal->isUniform(Ptr) || !Con) {
- scalarizeInstruction(Inst);
+ scalarizeInstruction(it);
break;
}
@@ -1272,7 +1483,7 @@
LI = Builder.CreateLoad(Ptr);
LI->setAlignment(Alignment);
// Use this vector value for all users of the load.
- WidenMap[Inst] = LI;
+ WidenMap[it] = LI;
break;
}
case Instruction::ZExt:
@@ -1288,144 +1499,22 @@
case Instruction::FPTrunc:
case Instruction::BitCast: {
/// Vectorize bitcasts.
- CastInst *CI = dyn_cast<CastInst>(Inst);
- Value *A = getVectorValue(Inst->getOperand(0));
+ CastInst *CI = dyn_cast<CastInst>(it);
+ Value *A = getVectorValue(it->getOperand(0));
Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
- WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+ WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
break;
}
-
+
default:
/// All other instructions are unsupported. Scalarize them.
- scalarizeInstruction(Inst);
+ scalarizeInstruction(it);
break;
}// end of switch.
}// end of for_each instr.
-
- // At this point every instruction in the original loop is widended to
- // a vector form. We are almost done. Now, we need to fix the PHI nodes
- // that we vectorized. The PHI nodes are currently empty because we did
- // not want to introduce cycles. Notice that the remaining PHI nodes
- // that we need to fix are reduction variables.
-
- // Create the 'reduced' values for each of the induction vars.
- // The reduced values are the vector values that we scalarize and combine
- // after the loop is finished.
- for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
- it != e; ++it) {
- PHINode *RdxPhi = *it;
- PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]);
- assert(RdxPhi && "Unable to recover vectorized PHI");
-
- // Find the reduction variable descriptor.
- assert(Legal->getReductionVars()->count(RdxPhi) &&
- "Unable to find the reduction variable");
- LoopVectorizationLegality::ReductionDescriptor RdxDesc =
- (*Legal->getReductionVars())[RdxPhi];
-
- // We need to generate a reduction vector from the incoming scalar.
- // To do so, we need to generate the 'identity' vector and overide
- // one of the elements with the incoming scalar reduction. We need
- // to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopBypassBlock->getTerminator());
-
- // This is the vector-clone of the value that leaves the loop.
- Value *VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
- Type *VecTy = VectorExit->getType();
-
- // Find the reduction identity variable. Zero for addition, or, xor,
- // one for multiplication, -1 for And.
- Constant *Identity = getUniformVector(getReductionIdentity(RdxDesc.Kind),
- VecTy->getScalarType());
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- Value *VectorStart = Builder.CreateInsertElement(Identity,
- RdxDesc.StartValue, Zero);
-
- // Fix the vector-loop phi.
- // We created the induction variable so we know that the
- // preheader is the first entry.
- BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
-
- // Reductions do not have to start at zero. They can start with
- // any loop invariant values.
- VecRdxPhi->addIncoming(VectorStart, VecPreheader);
- unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
- Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx));
- VecRdxPhi->addIncoming(Val, LoopVectorBody);
-
- // Before each round, move the insertion point right between
- // the PHIs and the values we are going to write.
- // This allows us to write both PHINodes and the extractelement
- // instructions.
- Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
-
- // This PHINode contains the vectorized reduction variable, or
- // the initial value vector, if we bypass the vector loop.
- PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
- NewPhi->addIncoming(VectorStart, LoopBypassBlock);
- NewPhi->addIncoming(getVectorValue(RdxDesc.LoopExitInstr), LoopVectorBody);
-
- // Extract the first scalar.
- Value *Scalar0 =
- Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
- // Extract and reduce the remaining vector elements.
- for (unsigned i=1; i < VF; ++i) {
- Value *Scalar1 =
- Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
- switch (RdxDesc.Kind) {
- case LoopVectorizationLegality::IntegerAdd:
- Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
- break;
- case LoopVectorizationLegality::IntegerMult:
- Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
- break;
- case LoopVectorizationLegality::IntegerOr:
- Scalar0 = Builder.CreateOr(Scalar0, Scalar1);
- break;
- case LoopVectorizationLegality::IntegerAnd:
- Scalar0 = Builder.CreateAnd(Scalar0, Scalar1);
- break;
- case LoopVectorizationLegality::IntegerXor:
- Scalar0 = Builder.CreateXor(Scalar0, Scalar1);
- break;
- default:
- llvm_unreachable("Unknown reduction operation");
- }
- }
-
- // Now, we need to fix the users of the reduction variable
- // inside and outside of the scalar remainder loop.
- // We know that the loop is in LCSSA form. We need to update the
- // PHI nodes in the exit blocks.
- for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
- LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
- PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi) continue;
-
- // All PHINodes need to have a single entry edge, or two if
- // we already fixed them.
- assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
-
- // We found our reduction value exit-PHI. Update it with the
- // incoming bypass edge.
- if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
- // Add an edge coming from the bypass.
- LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
- break;
- }
- }// end of the LCSSA phi scan.
-
- // Fix the scalar loop reduction variable with the incoming reduction sum
- // from the vector body and from the backedge value.
- int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block.
- (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
- (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
- }// end of for each redux variable.
}
+
void InnerLoopVectorizer::updateAnalysis() {
// Forget the original basic block.
SE->forgetLoop(OrigLoop);
Added: llvm/trunk/test/Transforms/LoopVectorize/if-conversion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/if-conversion.ll?rev=169217&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/if-conversion.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/if-conversion.ll Tue Dec 4 00:15:11 2012
@@ -0,0 +1,60 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -enable-if-conversion -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; This is the loop in this example:
+;
+;int function0(int *a, int *b, int start, int end) {
+;
+; for (int i=start; i<end; ++i) {
+; unsigned k = a[i];
+;
+; if (a[i] > b[i]) <------ notice the IF inside the loop.
+; k = k * 5 + 3;
+;
+; a[i] = k; <---- K is a phi node that becomes vector-select.
+; }
+;}
+
+;CHECK: @function0
+;CHECK: load <4 x i32>
+;CHECK: icmp sgt <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
+entry:
+ %cmp16 = icmp slt i32 %start, %end
+ br i1 %cmp16, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+ %0 = sext i32 %start to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %1 = load i32* %arrayidx, align 4
+ %arrayidx4 = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %2 = load i32* %arrayidx4, align 4
+ %cmp5 = icmp sgt i32 %1, %2
+ br i1 %cmp5, label %if.then, label %if.end
+
+if.then:
+ %mul = mul i32 %1, 5
+ %add = add i32 %mul, 3
+ br label %if.end
+
+if.end:
+ %k.0 = phi i32 [ %add, %if.then ], [ %1, %for.body ]
+ store i32 %k.0, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %3 = trunc i64 %indvars.iv.next to i32
+ %cmp = icmp slt i32 %3, %end
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret i32 undef
+}
More information about the llvm-commits
mailing list