[llvm] r318149 - [LV] Introduce VPBlendRecipe, VPWidenMemoryInstructionRecipe
Gil Rapaport via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 14 04:09:30 PST 2017
Author: gilr
Date: Tue Nov 14 04:09:30 2017
New Revision: 318149
URL: http://llvm.org/viewvc/llvm-project?rev=318149&view=rev
Log:
[LV] Introduce VPBlendRecipe, VPWidenMemoryInstructionRecipe
This patch is part of D38676.
The patch introduces two new Recipes to handle instructions whose vectorization
involves masking. These Recipes take VPlan-level masks in D38676, but still rely
on ILV's existing createEdgeMask(), createBlockInMask() in this patch.
VPBlendRecipe handles intra-loop phi nodes, which are vectorized as a sequence
of SELECTs. Its execute() code is refactored out of ILV::widenPHIInstruction(),
which now handles only loop-header phi nodes.
VPWidenMemoryInstructionRecipe handles load/store which are to be widened
(but are not part of an Interleave Group). In this patch it simply calls
ILV::vectorizeMemoryInstruction on execute().
Differential Revision: https://reviews.llvm.org/D39068
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/trunk/lib/Transforms/Vectorize/VPlan.h
llvm/trunk/test/Transforms/LoopVectorize/X86/x86-predication.ll
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=318149&r1=318148&r2=318149&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Tue Nov 14 04:09:30 2017
@@ -282,9 +282,12 @@ namespace {
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class LoopVectorizationRequirements;
+class VPBlendRecipe;
class VPInterleaveRecipe;
class VPReplicateRecipe;
class VPWidenIntOrFpInductionRecipe;
+class VPWidenRecipe;
+class VPWidenMemoryInstructionRecipe;
} // end anonymous namespace
@@ -452,6 +455,10 @@ public:
/// mask for the block BB.
VectorParts createBlockInMask(BasicBlock *BB);
+ /// A helper function that computes the predicate of the edge between SRC
+ /// and DST.
+ VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
+
/// Vectorize a single PHINode in a block. This method handles the induction
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
/// arbitrary length vectors.
@@ -504,6 +511,13 @@ public:
/// Try to vectorize the interleaved access group that \p Instr belongs to.
void vectorizeInterleaveGroup(Instruction *Instr);
+ /// Vectorize Load and Store instructions,
+ virtual void vectorizeMemoryInstruction(Instruction *Instr);
+
+ /// \brief Set the debug location in the builder using the debug location in
+ /// the instruction.
+ void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr);
+
protected:
friend class LoopVectorizationPlanner;
@@ -556,17 +570,10 @@ protected:
/// represented as.
void truncateToMinimalBitwidths();
- /// A helper function that computes the predicate of the edge between SRC
- /// and DST.
- VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
-
/// Insert the new loop to the loop hierarchy and pass manager
/// and update the analysis passes.
void updateAnalysis();
- /// Vectorize Load and Store instructions,
- virtual void vectorizeMemoryInstruction(Instruction *Instr);
-
/// Create a broadcast instruction. This method generates a broadcast
/// instruction (shuffle) for loop invariant values and for the induction
/// value. If this is the induction variable then we extend it to N, N+1, ...
@@ -647,10 +654,6 @@ protected:
/// vector of instructions.
void addMetadata(ArrayRef<Value *> To, Instruction *From);
- /// \brief Set the debug location in the builder using the debug location in
- /// the instruction.
- void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr);
-
/// The original loop.
Loop *OrigLoop;
@@ -2295,6 +2298,11 @@ private:
/// to \p Range.End.
VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range);
+ // Check if \I is a memory instruction to be widened for \p Range.Start and
+ // potentially masked.
+ VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
+ VFRange &Range);
+
/// Check if an induction recipe should be constructed for \I within the given
/// VF \p Range. If so build and return it. If not, return null. \p Range.End
/// may be decreased to ensure same decision from \p Range.Start to
@@ -2302,6 +2310,11 @@ private:
VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I,
VFRange &Range);
+ /// Handle non-loop phi nodes. Currently all such phi nodes are turned into
+ /// a sequence of select instructions as the vectorizer currently performs
+ /// full if-conversion.
+ VPBlendRecipe *tryToBlend(Instruction *I);
+
/// Check if \p I can be widened within the given VF \p Range. If \p I can be
/// widened for \p Range.Start, check if the last recipe of \p VPBB can be
/// extended to include \p I or else build a new VPWidenRecipe for it and
@@ -4497,77 +4510,6 @@ void InnerLoopVectorizer::sinkScalarOper
} while (Changed);
}
-InnerLoopVectorizer::VectorParts
-InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
- assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
-
- // Look for cached value.
- std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
- EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge);
- if (ECEntryIt != EdgeMaskCache.end())
- return ECEntryIt->second;
-
- VectorParts SrcMask = createBlockInMask(Src);
-
- // The terminator has to be a branch inst!
- BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
- assert(BI && "Unexpected terminator found");
-
- if (!BI->isConditional())
- return EdgeMaskCache[Edge] = SrcMask;
-
- VectorParts EdgeMask(UF);
- for (unsigned Part = 0; Part < UF; ++Part) {
- auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part);
- if (BI->getSuccessor(0) != Dst)
- EdgeMaskPart = Builder.CreateNot(EdgeMaskPart);
-
- if (SrcMask[Part]) // Otherwise block in-mask is all-one, no need to AND.
- EdgeMaskPart = Builder.CreateAnd(EdgeMaskPart, SrcMask[Part]);
-
- EdgeMask[Part] = EdgeMaskPart;
- }
-
- return EdgeMaskCache[Edge] = EdgeMask;
-}
-
-InnerLoopVectorizer::VectorParts
-InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
- assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
-
- // Look for cached value.
- BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB);
- if (BCEntryIt != BlockMaskCache.end())
- return BCEntryIt->second;
-
- // All-one mask is modelled as no-mask following the convention for masked
- // load/store/gather/scatter. Initialize BlockMask to no-mask.
- VectorParts BlockMask(UF);
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockMask[Part] = nullptr;
-
- // Loop incoming mask is all-one.
- if (OrigLoop->getHeader() == BB)
- return BlockMaskCache[BB] = BlockMask;
-
- // This is the block mask. We OR all incoming edges.
- for (auto *Predecessor : predecessors(BB)) {
- VectorParts EdgeMask = createEdgeMask(Predecessor, BB);
- if (!EdgeMask[0]) // Mask of predecessor is all-one so mask of block is too.
- return BlockMaskCache[BB] = EdgeMask;
-
- if (!BlockMask[0]) { // BlockMask has its initialized nullptr value.
- BlockMask = EdgeMask;
- continue;
- }
-
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EdgeMask[Part]);
- }
-
- return BlockMaskCache[BB] = BlockMask;
-}
-
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
unsigned VF) {
PHINode *P = cast<PHINode>(PN);
@@ -4588,43 +4530,6 @@ void InnerLoopVectorizer::widenPHIInstru
}
setDebugLocFromInst(Builder, P);
- // Check for PHI nodes that are lowered to vector selects.
- if (P->getParent() != OrigLoop->getHeader()) {
- // We know that all PHIs in non-header blocks are converted into
- // selects, so we don't have to worry about the insertion order and we
- // can just use the builder.
- // At this point we generate the predication tree. There may be
- // duplications since this is a simple recursive scan, but future
- // optimizations will clean it up.
-
- unsigned NumIncoming = P->getNumIncomingValues();
-
- // Generate a sequence of selects of the form:
- // SELECT(Mask3, In3,
- // SELECT(Mask2, In2,
- // ( ...)))
- VectorParts Entry(UF);
- for (unsigned In = 0; In < NumIncoming; In++) {
- VectorParts Cond =
- createEdgeMask(P->getIncomingBlock(In), P->getParent());
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *In0 = getOrCreateVectorValue(P->getIncomingValue(In), Part);
- assert((Cond[Part] || NumIncoming == 1) &&
- "Multiple predecessors with one predecessor having a full mask");
- if (In == 0)
- Entry[Part] = In0; // Initialize with the first incoming value.
- else
- // Select between the current value and the previous incoming edge
- // based on the incoming mask.
- Entry[Part] = Builder.CreateSelect(Cond[Part], In0, Entry[Part],
- "predphi");
- }
- }
- for (unsigned Part = 0; Part < UF; ++Part)
- VectorLoopValueMap.setVectorValue(P, Part, Entry[Part]);
- return;
- }
// This PHINode must be an induction variable.
// Make sure that we know about it.
@@ -4848,10 +4753,6 @@ void InnerLoopVectorizer::widenInstructi
break;
}
- case Instruction::Store:
- case Instruction::Load:
- vectorizeMemoryInstruction(&I);
- break;
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
@@ -4956,7 +4857,7 @@ void InnerLoopVectorizer::widenInstructi
}
default:
- // All other instructions are scalarized.
+ // This instruction is not vectorized by simple widening.
DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
llvm_unreachable("Unhandled instruction!");
} // end of switch.
@@ -7827,6 +7728,82 @@ public:
}
};
+/// A recipe for vectorizing a phi-node as a sequence of mask-based select
+/// instructions.
+class VPBlendRecipe : public VPRecipeBase {
+private:
+ PHINode *Phi;
+
+public:
+ VPBlendRecipe(PHINode *Phi) : VPRecipeBase(VPBlendSC), Phi(Phi) {}
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPRecipeBase *V) {
+ return V->getVPRecipeID() == VPRecipeBase::VPBlendSC;
+ }
+
+ /// Generate the phi/select nodes.
+ void execute(VPTransformState &State) override {
+ State.ILV->setDebugLocFromInst(State.Builder, Phi);
+ // We know that all PHIs in non-header blocks are converted into
+ // selects, so we don't have to worry about the insertion order and we
+ // can just use the builder.
+ // At this point we generate the predication tree. There may be
+ // duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+
+ unsigned NumIncoming = Phi->getNumIncomingValues();
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // ( ...)))
+ InnerLoopVectorizer::VectorParts Entry(State.UF);
+ for (unsigned In = 0; In < NumIncoming; In++) {
+ InnerLoopVectorizer::VectorParts Cond =
+ State.ILV->createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent());
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *In0 =
+ State.ILV->getOrCreateVectorValue(Phi->getIncomingValue(In), Part);
+ assert((Cond[Part] || NumIncoming == 1) &&
+ "Multiple predecessors with one predecessor having a full mask");
+ if (In == 0)
+ Entry[Part] = In0; // Initialize with the first incoming value.
+ else
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Entry[Part] = State.Builder.CreateSelect(Cond[Part], In0, Entry[Part],
+ "predphi");
+ }
+ }
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ State.ValueMap.setVectorValue(Phi, Part, Entry[Part]);
+ }
+
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent) const override {
+ O << " +\n" << Indent << "\"BLEND ";
+ Phi->printAsOperand(O, false);
+ O << " =";
+ if (Phi->getNumIncomingValues() == 1) {
+ // Not a User of any mask: not really blending, this is a
+ // single-predecessor phi.
+ O << " ";
+ Phi->getIncomingValue(0)->printAsOperand(O, false);
+ } else {
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I < E; ++I) {
+ O << " ";
+ Phi->getIncomingValue(I)->printAsOperand(O, false);
+ O << "/";
+ Phi->getIncomingBlock(I)->printAsOperand(O, false);
+ }
+ }
+ O << "\\l\"";
+
+ }
+};
+
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
/// or stores into one wide load/store and shuffles.
class VPInterleaveRecipe : public VPRecipeBase {
@@ -7970,6 +7947,31 @@ public:
}
};
+/// A Recipe for widening load/store operations.
+class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
+private:
+ Instruction &Instr;
+
+public:
+ VPWidenMemoryInstructionRecipe(Instruction &Instr)
+ : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) {}
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPRecipeBase *V) {
+ return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
+ }
+
+ /// Generate the wide load/store.
+ void execute(VPTransformState &State) override {
+ State.ILV->vectorizeMemoryInstruction(&Instr);
+ }
+
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent) const override {
+ O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr);
+ O << "\\l\"";
+ }
+};
} // end anonymous namespace
bool LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -7999,6 +8001,77 @@ void LoopVectorizationPlanner::buildVPla
}
}
+InnerLoopVectorizer::VectorParts
+InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
+ assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
+
+ // Look for cached value.
+ std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
+ EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge);
+ if (ECEntryIt != EdgeMaskCache.end())
+ return ECEntryIt->second;
+
+ VectorParts SrcMask = createBlockInMask(Src);
+
+ // The terminator has to be a branch inst!
+ BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
+ assert(BI && "Unexpected terminator found");
+
+ if (!BI->isConditional())
+ return EdgeMaskCache[Edge] = SrcMask;
+
+ VectorParts EdgeMask(UF);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ auto *EdgeMaskPart = getOrCreateVectorValue(BI->getCondition(), Part);
+ if (BI->getSuccessor(0) != Dst)
+ EdgeMaskPart = Builder.CreateNot(EdgeMaskPart);
+
+ if (SrcMask[Part]) // Otherwise block in-mask is all-one, no need to AND.
+ EdgeMaskPart = Builder.CreateAnd(EdgeMaskPart, SrcMask[Part]);
+
+ EdgeMask[Part] = EdgeMaskPart;
+ }
+
+ return EdgeMaskCache[Edge] = EdgeMask;
+}
+
+InnerLoopVectorizer::VectorParts
+InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
+ assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
+
+ // Look for cached value.
+ BlockMaskCacheTy::iterator BCEntryIt = BlockMaskCache.find(BB);
+ if (BCEntryIt != BlockMaskCache.end())
+ return BCEntryIt->second;
+
+ // All-one mask is modelled as no-mask following the convention for masked
+ // load/store/gather/scatter. Initialize BlockMask to no-mask.
+ VectorParts BlockMask(UF);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ BlockMask[Part] = nullptr;
+
+ // Loop incoming mask is all-one.
+ if (OrigLoop->getHeader() == BB)
+ return BlockMaskCache[BB] = BlockMask;
+
+ // This is the block mask. We OR all incoming edges.
+ for (auto *Predecessor : predecessors(BB)) {
+ VectorParts EdgeMask = createEdgeMask(Predecessor, BB);
+ if (!EdgeMask[0]) // Mask of predecessor is all-one so mask of block is too.
+ return BlockMaskCache[BB] = EdgeMask;
+
+ if (!BlockMask[0]) { // BlockMask has its initialized nullptr value.
+ BlockMask = EdgeMask;
+ continue;
+ }
+
+ for (unsigned Part = 0; Part < UF; ++Part)
+ BlockMask[Part] = Builder.CreateOr(BlockMask[Part], EdgeMask[Part]);
+ }
+
+ return BlockMaskCache[BB] = BlockMask;
+}
+
VPInterleaveRecipe *
LoopVectorizationPlanner::tryToInterleaveMemory(Instruction *I,
VFRange &Range) {
@@ -8026,6 +8099,32 @@ LoopVectorizationPlanner::tryToInterleav
return new VPInterleaveRecipe(IG);
}
+VPWidenMemoryInstructionRecipe *
+LoopVectorizationPlanner::tryToWidenMemory(Instruction *I, VFRange &Range) {
+ if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
+ return nullptr;
+
+ auto willWiden = [&](unsigned VF) -> bool {
+ if (VF == 1)
+ return false;
+ if (CM.isScalarAfterVectorization(I, VF) ||
+ CM.isProfitableToScalarize(I, VF))
+ return false;
+ LoopVectorizationCostModel::InstWidening Decision =
+ CM.getWideningDecision(I, VF);
+ assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
+ "CM decision should be taken at this point.");
+ assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
+ "Interleave memory opportunity should be caught earlier.");
+ return Decision != LoopVectorizationCostModel::CM_Scalarize;
+ };
+
+ if (!getDecisionAndClampRange(willWiden, Range))
+ return nullptr;
+
+ return new VPWidenMemoryInstructionRecipe(*I);
+}
+
VPWidenIntOrFpInductionRecipe *
LoopVectorizationPlanner::tryToOptimizeInduction(Instruction *I,
VFRange &Range) {
@@ -8060,6 +8159,14 @@ LoopVectorizationPlanner::tryToOptimizeI
return nullptr;
}
+VPBlendRecipe *LoopVectorizationPlanner::tryToBlend(Instruction *I) {
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (!Phi || Phi->getParent() == OrigLoop->getHeader())
+ return nullptr;
+
+ return new VPBlendRecipe(Phi);
+}
+
bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
VFRange &Range) {
if (Legal->isScalarWithPredication(I))
@@ -8313,11 +8420,21 @@ std::unique_ptr<VPlan> LoopVectorization
continue;
}
+ // Check if Instr is a memory operation that should be widened.
+ if ((Recipe = tryToWidenMemory(Instr, Range))) {
+ VPBB->appendRecipe(Recipe);
+ continue;
+ }
+
// Check if Instr should form some PHI recipe.
if ((Recipe = tryToOptimizeInduction(Instr, Range))) {
VPBB->appendRecipe(Recipe);
continue;
}
+ if ((Recipe = tryToBlend(Instr))) {
+ VPBB->appendRecipe(Recipe);
+ continue;
+ }
if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
continue;
Modified: llvm/trunk/lib/Transforms/Vectorize/VPlan.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlan.h?rev=318149&r1=318148&r2=318149&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VPlan.h (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VPlan.h Tue Nov 14 04:09:30 2017
@@ -452,11 +452,13 @@ public:
/// SubclassID field of the VPRecipeBase objects. They are used for concrete
/// type identification.
using VPRecipeTy = enum {
+ VPBlendSC,
VPBranchOnMaskSC,
VPInterleaveSC,
VPPredInstPHISC,
VPReplicateSC,
VPWidenIntOrFpInductionSC,
+ VPWidenMemoryInstructionSC,
VPWidenPHISC,
VPWidenSC,
};
Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/x86-predication.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/x86-predication.ll?rev=318149&r1=318148&r2=318149&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/x86-predication.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/x86-predication.ll Tue Nov 14 04:09:30 2017
@@ -1,4 +1,5 @@
; RUN: opt < %s -mattr=avx -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -simplifycfg -S | FileCheck %s
+; RUN: opt -mcpu=skylake-avx512 -S -force-vector-width=8 -force-vector-interleave=1 -loop-vectorize < %s | FileCheck %s --check-prefix=SINK-GATHER
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -58,3 +59,40 @@ for.end:
%tmp8 = phi i32 [ %tmp7, %for.inc ]
ret i32 %tmp8
}
+
+; This test ensures that a load, which would have been widened otherwise is
+; instead scalarized if Cost-Model so decided as part of its
+; sink-scalar-operands optimization for predicated instructions.
+;
+; SINK-GATHER: vector.body:
+; SINK-GATHER: pred.udiv.if:
+; SINK-GATHER: %[[T0:.+]] = load i32, i32* %{{.*}}, align 4
+; SINK-GATHER: %{{.*}} = udiv i32 %[[T0]], %{{.*}}
+; SINK-GATHER: pred.udiv.continue:
+define i32 @scalarize_and_sink_gather(i32* %a, i1 %c, i32 %x, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.inc ]
+ %r = phi i32 [ 0, %entry ], [ %tmp6, %for.inc ]
+ %i7 = mul i64 %i, 777
+ br i1 %c, label %if.then, label %for.inc
+
+if.then:
+ %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i7
+ %tmp2 = load i32, i32* %tmp0, align 4
+ %tmp4 = udiv i32 %tmp2, %x
+ br label %for.inc
+
+for.inc:
+ %tmp5 = phi i32 [ %x, %for.body ], [ %tmp4, %if.then]
+ %tmp6 = add i32 %r, %tmp5
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ %tmp7 = phi i32 [ %tmp6, %for.inc ]
+ ret i32 %tmp7
+}
More information about the llvm-commits
mailing list