<div dir="ltr"><br><br><div class="gmail_quote"><div dir="ltr">On Wed, Nov 14, 2018 at 2:14 PM Florian Hahn via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: fhahn<br>
Date: Wed Nov 14 05:11:49 2018<br>
New Revision: 346857<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=346857&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=346857&view=rev</a><br>
Log:<br>
[VPlan, SLP] Add simple SLP analysis on top of VPlan.<br>
<br>
This patch adds an initial implementation of the look-ahead SLP tree<br>
construction described in 'Look-Ahead SLP: Auto-vectorization in the Presence<br>
of Commutative Operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,<br>
Luís F. W. Góes'.<br>
<br>
It returns an SLP tree represented as VPInstructions, with combined<br>
instructions represented as a single, wider VPInstruction.<br>
<br>
This initial version does not support instructions with multiple<br>
different users (either inside or outside the SLP tree) or<br>
non-instruction operands; it won't generate any shuffles or<br>
insertelement instructions.<br>
<br>
It also just adds the analysis that builds an SLP tree rooted in a set<br>
of stores. It does not include any cost modeling or memory legality<br>
checks. The plan is to integrate it with VPlan based cost modeling, once<br>
available and to only apply it to operations that can be widened.<br>
<br>
A follow-up patch will add a support for replacing instructions in a<br>
VPlan with their SLP counter parts.<br>
<br>
Reviewers: Ayal, mssimpso, rengolin, mkuper, hfinkel, hsaito, dcaballe, vporpo, RKSimon, ABataev<br>
<br>
Reviewed By: rengolin<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D4949" rel="noreferrer" target="_blank">https://reviews.llvm.org/D4949</a><br>
<br>
Added:<br>
llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp<br>
llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp<br>
Modified:<br>
llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt<br>
llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp<br>
llvm/trunk/lib/Transforms/Vectorize/VPlan.h<br>
llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h<br>
llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt<br>
<br>
Modified: llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt?rev=346857&r1=346856&r2=346857&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt?rev=346857&r1=346856&r2=346857&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt (original)<br>
+++ llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt Wed Nov 14 05:11:49 2018<br>
@@ -7,6 +7,7 @@ add_llvm_library(LLVMVectorize<br>
VPlan.cpp<br>
VPlanHCFGBuilder.cpp<br>
VPlanHCFGTransforms.cpp<br>
+ VPlanSLP.cpp<br>
VPlanVerifier.cpp<br>
<br>
ADDITIONAL_HEADER_DIRS<br>
<br>
Modified: llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp?rev=346857&r1=346856&r2=346857&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp?rev=346857&r1=346856&r2=346857&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp Wed Nov 14 05:11:49 2018<br>
@@ -338,6 +338,12 @@ void VPInstruction::print(raw_ostream &O<br>
case VPInstruction::ICmpULE:<br>
O << "icmp ule";<br>
break;<br>
+ case VPInstruction::SLPLoad:<br>
+ O << "combined load";<br>
+ break;<br>
+ case VPInstruction::SLPStore:<br>
+ O << "combined store";<br>
+ break;<br>
default:<br>
O << Instruction::getOpcodeName(getOpcode());<br>
}<br>
@@ -681,6 +687,13 @@ void VPWidenMemoryInstructionRecipe::pri<br>
<br>
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);<br>
<br>
+void VPValue::replaceAllUsesWith(VPValue *New) {<br>
+ for (VPUser *User : users())<br>
+ for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)<br>
+ if (User->getOperand(I) == this)<br>
+ User->setOperand(I, New);<br>
+}<br>
+<br>
void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,<br>
Old2NewTy &Old2New,<br>
InterleavedAccessInfo &IAI) {<br>
<br>
Modified: llvm/trunk/lib/Transforms/Vectorize/VPlan.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlan.h?rev=346857&r1=346856&r2=346857&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlan.h?rev=346857&r1=346856&r2=346857&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Vectorize/VPlan.h (original)<br>
+++ llvm/trunk/lib/Transforms/Vectorize/VPlan.h Wed Nov 14 05:11:49 2018<br>
@@ -60,6 +60,7 @@ class Value;<br>
class VPBasicBlock;<br>
class VPRegionBlock;<br>
class VPlan;<br>
+class VPlanSlp;<br>
<br>
/// A range of powers-of-2 vectorization factors with fixed start and<br>
/// adjustable end. The range includes start and excludes end, e.g.,:<br>
@@ -609,10 +610,16 @@ public:<br>
/// the VPInstruction is also a single def-use vertex.<br>
class VPInstruction : public VPUser, public VPRecipeBase {<br>
friend class VPlanHCFGTransforms;<br>
+ friend class VPlanSlp;<br>
<br>
public:<br>
/// VPlan opcodes, extending LLVM IR with idiomatics instructions.<br>
- enum { Not = Instruction::OtherOpsEnd + 1, ICmpULE };<br>
+ enum {<br>
+ Not = Instruction::OtherOpsEnd + 1,<br>
+ ICmpULE,<br>
+ SLPLoad,<br>
+ SLPStore,<br>
+ };<br>
<br>
private:<br>
typedef unsigned char OpcodeTy;<br>
@@ -622,6 +629,13 @@ private:<br>
/// modeled instruction.<br>
void generateInstruction(VPTransformState &State, unsigned Part);<br>
<br>
+protected:<br>
+ Instruction *getUnderlyingInstr() {<br>
+ return cast_or_null<Instruction>(getUnderlyingValue());<br>
+ }<br>
+<br>
+ void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }<br>
+<br>
public:<br>
VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)<br>
: VPUser(VPValue::VPInstructionSC, Operands),<br>
@@ -635,6 +649,11 @@ public:<br>
return V->getVPValueID() == VPValue::VPInstructionSC;<br>
}<br>
<br>
+ VPInstruction *clone() const {<br>
+ SmallVector<VPValue *, 2> Operands(operands());<br>
+ return new VPInstruction(Opcode, Operands);<br>
+ }<br>
+<br>
/// Method to support type inquiry through isa, cast, and dyn_cast.<br>
static inline bool classof(const VPRecipeBase *R) {<br>
return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;<br>
@@ -652,6 +671,14 @@ public:<br>
<br>
/// Print the VPInstruction.<br>
void print(raw_ostream &O) const;<br>
+<br>
+ /// Return true if this instruction may modify memory.<br>
+ bool mayWriteToMemory() const {<br>
+ // TODO: we can use attributes of the called function to rule out memory<br>
+ // modifications.<br>
+ return Opcode == Instruction::Store || Opcode == Instruction::Call ||<br>
+ Opcode == Instruction::Invoke || Opcode == SLPStore;<br>
+ }<br>
};<br>
<br>
/// VPWidenRecipe is a recipe for producing a copy of vector type for each<br>
@@ -1508,6 +1535,102 @@ public:<br>
}<br>
};<br>
<br>
+/// Class that maps (parts of) an existing VPlan to trees of combined<br>
+/// VPInstructions.<br>
+class VPlanSlp {<br>
+private:<br>
+ enum class OpMode { Failed, Load, Opcode };<br>
+<br>
+ /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as<br>
+ /// DenseMap keys.<br>
+ struct BundleDenseMapInfo {<br>
+ static SmallVector<VPValue *, 4> getEmptyKey() {<br>
+ return {reinterpret_cast<VPValue *>(-1)};<br>
+ }<br>
+<br>
+ static SmallVector<VPValue *, 4> getTombstoneKey() {<br>
+ return {reinterpret_cast<VPValue *>(-2)};<br>
+ }<br>
+<br>
+ static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {<br>
+ return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));<br>
+ }<br>
+<br>
+ static bool isEqual(const SmallVector<VPValue *, 4> &LHS,<br>
+ const SmallVector<VPValue *, 4> &RHS) {<br>
+ return LHS == RHS;<br>
+ }<br>
+ };<br>
+<br>
+ /// Mapping of values in the original VPlan to a combined VPInstruction.<br>
+ DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo><br>
+ BundleToCombined;<br>
+<br>
+ VPInterleavedAccessInfo &IAI;<br>
+<br>
+ /// Basic block to operate on. For now, only instructions in a single BB are<br>
+ /// considered.<br>
+ const VPBasicBlock &BB;<br>
+<br>
+ /// Indicates whether we managed to combine all visited instructions or not.<br>
+ bool CompletelySLP = true;<br>
+<br>
+ /// Width of the widest combined bundle in bits.<br>
+ unsigned WidestBundleBits = 0;<br>
+<br>
+ using MultiNodeOpTy =<br>
+ typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;<br>
+<br>
+ // Input operand bundles for the current multi node. Each multi node operand<br>
+ // bundle contains values not matching the multi node's opcode. They will<br>
+ // be reordered in reorderMultiNodeOps, once we completed building a<br>
+ // multi node.<br>
+ SmallVector<MultiNodeOpTy, 4> MultiNodeOps;<br>
+<br>
+ /// Indicates whether we are building a multi node currently.<br>
+ bool MultiNodeActive = false;<br>
+<br>
+ /// Check if we can vectorize Operands together.<br>
+ bool areVectorizable(ArrayRef<VPValue *> Operands) const;<br>
+<br>
+ /// Add combined instruction \p New for the bundle \p Operands.<br>
+ void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);<br>
+<br>
+ /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.<br>
+ VPInstruction *markFailed();<br>
+<br>
+ /// Reorder operands in the multi node to maximize sequential memory access<br>
+ /// and commutative operations.<br>
+ SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();<br>
+<br>
+ /// Choose the best candidate to use for the lane after \p Last. The set of<br>
+ /// candidates to choose from are values with an opcode matching \p Last's<br>
+ /// or loads consecutive to \p Last.<br>
+ std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,<br>
+ SmallVectorImpl<VPValue *> &Candidates,<br>
+ VPInterleavedAccessInfo &IAI);<br>
+<br>
+ /// Print bundle \p Values to dbgs().<br>
+ void dumpBundle(ArrayRef<VPValue *> Values);<br>
+<br>
+public:<br>
+ VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}<br>
+<br>
+ ~VPlanSlp() {<br>
+ for (auto &KV : BundleToCombined)<br>
+ delete KV.second;<br>
+ }<br>
+<br>
+ /// Tries to build an SLP tree rooted at \p Operands and returns a<br>
+ /// VPInstruction combining \p Operands, if they can be combined.<br>
+ VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);<br>
+<br>
+ /// Return the width of the widest combined bundle in bits.<br>
+ unsigned getWidestBundleBits() const { return WidestBundleBits; }<br>
+<br>
+ /// Return true if all visited instruction can be combined.<br>
+ bool isCompletelySLP() const { return CompletelySLP; }<br>
+};<br>
} // end namespace llvm<br>
<br>
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H<br>
<br>
Added: llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp?rev=346857&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp?rev=346857&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp (added)<br>
+++ llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp Wed Nov 14 05:11:49 2018<br>
@@ -0,0 +1,469 @@<br>
+//===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//<br>
+//<br>
+// The LLVM Compiler Infrastructure<br>
+//<br>
+// This file is distributed under the University of Illinois Open Source<br>
+// License. See LICENSE.TXT for details.<br>
+//<br>
+//===----------------------------------------------------------------------===//<br>
+/// This file implements SLP analysis based on VPlan. The analysis is based on<br>
+/// the ideas described in<br>
+///<br>
+/// Look-ahead SLP: auto-vectorization in the presence of commutative<br>
+/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,<br>
+/// LuÃs F. W. Góes<br>
+///<br>
+//===----------------------------------------------------------------------===//<br>
+<br>
+#include "VPlan.h"<br>
+#include "llvm/ADT/DepthFirstIterator.h"<br>
+#include "llvm/ADT/PostOrderIterator.h"<br>
+#include "llvm/ADT/SmallVector.h"<br>
+#include "llvm/ADT/Twine.h"<br>
+#include "llvm/Analysis/LoopInfo.h"<br>
+#include "llvm/Analysis/VectorUtils.h"<br>
+#include "llvm/IR/BasicBlock.h"<br>
+#include "llvm/IR/CFG.h"<br>
+#include "llvm/IR/Dominators.h"<br>
+#include "llvm/IR/InstrTypes.h"<br>
+#include "llvm/IR/Instruction.h"<br>
+#include "llvm/IR/Instructions.h"<br>
+#include "llvm/IR/Type.h"<br>
+#include "llvm/IR/Value.h"<br>
+#include "llvm/Support/Casting.h"<br>
+#include "llvm/Support/Debug.h"<br>
+#include "llvm/Support/ErrorHandling.h"<br>
+#include "llvm/Support/GraphWriter.h"<br>
+#include "llvm/Support/raw_ostream.h"<br>
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"<br>
+#include <cassert><br>
+#include <iterator><br>
+#include <string><br>
+#include <vector><br>
+<br>
+using namespace llvm;<br>
+<br>
+#define DEBUG_TYPE "vplan-slp"<br>
+<br>
+// Number of levels to look ahead when re-ordering multi node operands.<br>
+static unsigned LookaheadMaxDepth = 5;<br>
+<br>
+VPInstruction *VPlanSlp::markFailed() {<br>
+ // FIXME: Currently this is used to signal we hit instructions we cannot<br>
+ // trivially SLP'ize.<br>
+ CompletelySLP = false;<br>
+ return nullptr;<br>
+}<br>
+<br>
+void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {<br>
+ if (all_of(Operands, [](VPValue *V) {<br>
+ return cast<VPInstruction>(V)->getUnderlyingInstr();<br>
+ })) {<br>
+ unsigned BundleSize = 0;<br>
+ for (VPValue *V : Operands) {<br>
+ Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();<br>
+ assert(!T->isVectorTy() && "Only scalar types supported for now");<br>
+ BundleSize += T->getScalarSizeInBits();<br>
+ }<br>
+ WidestBundleBits = std::max(WidestBundleBits, BundleSize);<br>
+ }<br>
+<br>
+ auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);<br>
+ assert(Res.second &&<br>
+ "Already created a combined instruction for the operand bundle");<br>
+ (void)Res;<br>
+}<br>
+<br>
+bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {<br>
+ // Currently we only support VPInstructions.<br>
+ if (!all_of(Operands, [](VPValue *Op) {<br>
+ return Op && isa<VPInstruction>(Op) &&<br>
+ cast<VPInstruction>(Op)->getUnderlyingInstr();<br>
+ })) {<br>
+ LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");<br>
+ return false;<br>
+ }<br>
+<br>
+ // Check if opcodes and type width agree for all instructions in the bundle.<br>
+ // FIXME: Differing widths/opcodes can be handled by inserting additional<br>
+ // instructions.<br>
+ // FIXME: Deal with non-primitive types.<br>
+ const Instruction *OriginalInstr =<br>
+ cast<VPInstruction>(Operands[0])->getUnderlyingInstr();<br>
+ unsigned Opcode = OriginalInstr->getOpcode();<br>
+ unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();<br>
+ if (!all_of(Operands, [Opcode, Width](VPValue *Op) {<br>
+ const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();<br>
+ return I->getOpcode() == Opcode &&<br>
+ I->getType()->getPrimitiveSizeInBits() == Width;<br>
+ })) {<br>
+ LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");<br>
+ return false;<br>
+ }<br>
+<br>
+ // For now, all operands must be defined in the same BB.<br>
+ if (any_of(Operands, [this](VPValue *Op) {<br>
+ return cast<VPInstruction>(Op)->getParent() != &this->BB;<br>
+ })) {<br>
+ LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");<br>
+ return false;<br>
+ }<br>
+<br>
+ if (any_of(Operands,<br>
+ [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {<br>
+ LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");<br>
+ return false;<br>
+ }<br>
+<br>
+ // For loads, check that there are no instructions writing to memory in<br>
+ // between them.<br>
+ // TODO: we only have to forbid instructions writing to memory that could<br>
+ // interfere with any of the loads in the bundle<br>
+ if (Opcode == Instruction::Load) {<br>
+ unsigned LoadsSeen = 0;<br>
+ VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();<br>
+ for (auto &I : *Parent) {<br>
+ auto *VPI = cast<VPInstruction>(&I);<br>
+ if (VPI->getOpcode() == Instruction::Load &&<br>
+ std::find(Operands.begin(), Operands.end(), VPI) != Operands.end())<br>
+ LoadsSeen++;<br>
+<br>
+ if (LoadsSeen == Operands.size())<br>
+ break;<br>
+ if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {<br>
+ LLVM_DEBUG(<br>
+ dbgs() << "VPSLP: instruction modifying memory between loads\n");<br>
+ return false;<br>
+ }<br>
+ }<br>
+<br>
+ if (!all_of(Operands, [](VPValue *Op) {<br>
+ return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())<br>
+ ->isSimple();<br>
+ })) {<br>
+ LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");<br>
+ return false;<br>
+ }<br>
+ }<br>
+<br>
+ if (Opcode == Instruction::Store)<br>
+ if (!all_of(Operands, [](VPValue *Op) {<br>
+ return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())<br>
+ ->isSimple();<br>
+ })) {<br>
+ LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");<br>
+ return false;<br>
+ }<br>
+<br>
+ return true;<br>
+}<br>
+<br>
+static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,<br>
+ unsigned OperandIndex) {<br>
+ SmallVector<VPValue *, 4> Operands;<br>
+ for (VPValue *V : Values) {<br>
+ auto *U = cast<VPUser>(V);<br>
+ Operands.push_back(U->getOperand(OperandIndex));<br>
+ }<br>
+ return Operands;<br>
+}<br>
+<br>
+static bool areCommutative(ArrayRef<VPValue *> Values) {<br>
+ return Instruction::isCommutative(<br>
+ cast<VPInstruction>(Values[0])->getOpcode());<br>
+}<br>
+<br>
+static SmallVector<SmallVector<VPValue *, 4>, 4><br>
+getOperands(ArrayRef<VPValue *> Values) {<br>
+ SmallVector<SmallVector<VPValue *, 4>, 4> Result;<br>
+ auto *VPI = cast<VPInstruction>(Values[0]);<br>
+<br>
+ switch (VPI->getOpcode()) {<br>
+ case Instruction::Load:<br>
+ llvm_unreachable("Loads terminate a tree, no need to get operands");<br>
+ case Instruction::Store:<br>
+ Result.push_back(getOperands(Values, 0));<br>
+ break;<br>
+ default:<br>
+ for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)<br>
+ Result.push_back(getOperands(Values, I));<br>
+ break;<br>
+ }<br>
+<br>
+ return Result;<br>
+}<br>
+<br>
+/// Returns the opcode of Values or ~0 if they do not all agree.<br>
+static Optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {<br>
+ unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();<br>
+ if (any_of(Values, [Opcode](VPValue *V) {<br>
+ return cast<VPInstruction>(V)->getOpcode() != Opcode;<br>
+ }))<br>
+ return None;<br>
+ return {Opcode};<br>
+}<br>
+<br>
+/// Returns true if A and B access sequential memory if they are loads or<br>
+/// stores or if they have identical opcodes otherwise.<br>
+static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,<br>
+ VPInterleavedAccessInfo &IAI) {<br>
+ if (A->getOpcode() != B->getOpcode())<br>
+ return false;<br>
+<br>
+ if (A->getOpcode() != Instruction::Load &&<br>
+ A->getOpcode() != Instruction::Store)<br>
+ return true;<br>
+ auto *GA = IAI.getInterleaveGroup(A);<br>
+ auto *GB = IAI.getInterleaveGroup(B);<br>
+<br>
+ return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);<br>
+}<br>
+<br>
+/// Implements getLAScore from Listing 7 in the paper.<br>
+/// Traverses and compares operands of V1 and V2 to MaxLevel.<br>
+static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,<br>
+ VPInterleavedAccessInfo &IAI) {<br>
+ if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))<br>
+ return 0;<br>
+<br>
+ if (MaxLevel == 0)<br>
+ return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1),<br>
+ cast<VPInstruction>(V2), IAI);<br>
+<br>
+ unsigned Score = 0;<br>
+ for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1; ++I)<br>
+ for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J < EV2; ++J)<br>
+ Score += getLAScore(cast<VPUser>(V1)->getOperand(I),<br>
+ cast<VPUser>(V2)->getOperand(J), MaxLevel - 1, IAI);<br>
+ return Score;<br>
+}<br>
+<br>
+std::pair<VPlanSlp::OpMode, VPValue *><br>
+VPlanSlp::getBest(OpMode Mode, VPValue *Last,<br>
+ SmallVectorImpl<VPValue *> &Candidates,<br>
+ VPInterleavedAccessInfo &IAI) {<br>
+ LLVM_DEBUG(dbgs() << " getBest\n");<br>
+ VPValue *Best = Candidates[0];<br>
+ SmallVector<VPValue *, 4> BestCandidates;<br>
+<br>
+ LLVM_DEBUG(dbgs() << " Candidates for "<br>
+ << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");<br>
+ for (auto *Candidate : Candidates) {<br>
+ auto *LastI = cast<VPInstruction>(Last);<br>
+ auto *CandidateI = cast<VPInstruction>(Candidate);<br>
+ if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {<br>
+ LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()<br>
+ << " ");<br>
+ BestCandidates.push_back(Candidate);<br>
+ }<br>
+ }<br>
+ LLVM_DEBUG(dbgs() << "\n");<br>
+<br>
+ if (BestCandidates.empty())<br>
+ return {OpMode::Failed, nullptr};<br>
+<br>
+ if (BestCandidates.size() == 1)<br>
+ return {Mode, BestCandidates[0]};<br>
+<br>
+ if (Mode == OpMode::Opcode) {<br>
+ unsigned BestScore = 0;<br>
+ for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {<br>
+ unsigned PrevScore = ~0u;<br>
+ bool AllSame = true;<br>
+<br>
+ // FIXME: Avoid visiting the same operands multiple times.<br>
+ for (auto *Candidate : BestCandidates) {<br>
+ unsigned Score = getLAScore(Last, Candidate, Depth, IAI);<br>
+ if (PrevScore == ~0u)<br>
+ PrevScore = Score;<br>
+ if (PrevScore != Score)<br>
+ AllSame = false;<br>
+ PrevScore = Score;<br>
+<br>
+ if (Score > BestScore) {<br>
+ BestScore = Score;<br>
+ Best = Candidate;<br>
+ }<br>
+ }<br>
+ if (!AllSame)<br>
+ break;<br>
+ }<br>
+ }<br>
+ LLVM_DEBUG(dbgs() << "Found best "<br>
+ << *cast<VPInstruction>(Best)->getUnderlyingInstr()<br>
+ << "\n");<br>
+ std::remove(Candidates.begin(), Candidates.end(), Best);<br></blockquote><div><br></div><div>What's this supposed to do? Right now it moves "Best" to the end of the vector if it's there. That doesn't look like it's intentional.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+ return {Mode, Best};<br>
+}<br>
+<br>
+SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {<br>
+ SmallVector<MultiNodeOpTy, 4> FinalOrder;<br>
+ SmallVector<OpMode, 4> Mode;<br>
+ FinalOrder.reserve(MultiNodeOps.size());<br>
+ Mode.reserve(MultiNodeOps.size());<br>
+<br>
+ LLVM_DEBUG(dbgs() << "Reordering multinode\n");<br>
+<br>
+ for (auto &Operands : MultiNodeOps) {<br>
+ FinalOrder.push_back({Operands.first, {Operands.second[0]}});<br>
+ if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==<br>
+ Instruction::Load)<br>
+ Mode.push_back(OpMode::Load);<br>
+ else<br>
+ Mode.push_back(OpMode::Opcode);<br>
+ }<br>
+<br>
+ for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {<br>
+ LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");<br>
+ SmallVector<VPValue *, 4> Candidates;<br>
+ Candidates.reserve(MultiNodeOps.size());<br>
+ LLVM_DEBUG(dbgs() << " Candidates ");<br>
+ for (auto Ops : MultiNodeOps) {<br>
+ LLVM_DEBUG(<br>
+ dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()<br>
+ << " ");<br>
+ Candidates.push_back(Ops.second[Lane]);<br>
+ }<br>
+ LLVM_DEBUG(dbgs() << "\n");<br>
+<br>
+ for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {<br>
+ LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");<br>
+ if (Mode[Op] == OpMode::Failed)<br>
+ continue;<br>
+<br>
+ VPValue *Last = FinalOrder[Op].second[Lane - 1];<br>
+ std::pair<OpMode, VPValue *> Res =<br>
+ getBest(Mode[Op], Last, Candidates, IAI);<br>
+ if (Res.second)<br>
+ FinalOrder[Op].second.push_back(Res.second);<br>
+ else<br>
+ // TODO: handle this case<br>
+ FinalOrder[Op].second.push_back(markFailed());<br>
+ }<br>
+ }<br>
+<br>
+ return FinalOrder;<br>
+}<br>
+<br>
+void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {<br>
+ LLVM_DEBUG(dbgs() << " Ops: ");<br>
+ for (auto Op : Values)<br>
+ if (auto *Instr = cast_or_null<VPInstruction>(Op)->getUnderlyingInstr())<br>
+ LLVM_DEBUG(dbgs() << *Instr << " | ");<br>
+ else<br>
+ LLVM_DEBUG(dbgs() << " nullptr | ");<br>
+ LLVM_DEBUG(dbgs() << "\n");<br>
+}<br>
+<br>
+VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {<br>
+ assert(!Values.empty() && "Need some operands!");<br>
+<br>
+ // If we already visited this instruction bundle, re-use the existing node<br>
+ auto I = BundleToCombined.find(to_vector<4>(Values));<br>
+ if (I != BundleToCombined.end()) {<br>
+#ifdef NDEBUG<br>
+ // Check that the resulting graph is a tree. If we re-use a node, this means<br>
+ // its values have multiple users. We only allow this, if all users of each<br>
+ // value are the same instruction.<br>
+ for (auto *V : Values) {<br>
+ auto UI = V->user_begin();<br>
+ auto *FirstUser = *UI++;<br>
+ while (UI != V->use_end()) {<br>
+ assert(*UI == FirstUser && "Currently we only support SLP trees.");<br>
+ UI++;<br>
+ }<br>
+ }<br>
+#endif<br>
+ return I->second;<br>
+ }<br>
+<br>
+ // Dump inputs<br>
+ LLVM_DEBUG({<br>
+ dbgs() << "buildGraph: ";<br>
+ dumpBundle(Values);<br>
+ });<br>
+<br>
+ if (!areVectorizable(Values))<br>
+ return markFailed();<br>
+<br>
+ assert(getOpcode(Values) && "Opcodes for all values must match");<br>
+ unsigned ValuesOpcode = getOpcode(Values).getValue();<br>
+<br>
+ SmallVector<VPValue *, 4> CombinedOperands;<br>
+ if (areCommutative(Values)) {<br>
+ bool MultiNodeRoot = !MultiNodeActive;<br>
+ MultiNodeActive = true;<br>
+ for (auto &Operands : getOperands(Values)) {<br>
+ LLVM_DEBUG({<br>
+ dbgs() << " Visiting Commutative";<br>
+ dumpBundle(Operands);<br>
+ });<br>
+<br>
+ auto OperandsOpcode = getOpcode(Operands);<br>
+ if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {<br>
+ LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");<br>
+ CombinedOperands.push_back(buildGraph(Operands));<br>
+ } else {<br>
+ LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");<br>
+ // Create dummy VPInstruction, which will we replace later by the<br>
+ // re-ordered operand.<br>
+ VPInstruction *Op = new VPInstruction(0, {});<br>
+ CombinedOperands.push_back(Op);<br>
+ MultiNodeOps.emplace_back(Op, Operands);<br>
+ }<br>
+ }<br>
+<br>
+ if (MultiNodeRoot) {<br>
+ LLVM_DEBUG(dbgs() << "Reorder \n");<br>
+ MultiNodeActive = false;<br>
+<br>
+ auto FinalOrder = reorderMultiNodeOps();<br>
+<br>
+ MultiNodeOps.clear();<br>
+ for (auto &Ops : FinalOrder) {<br>
+ VPInstruction *NewOp = buildGraph(Ops.second);<br>
+ Ops.first->replaceAllUsesWith(NewOp);<br>
+ for (unsigned i = 0; i < CombinedOperands.size(); i++)<br>
+ if (CombinedOperands[i] == Ops.first)<br>
+ CombinedOperands[i] = NewOp;<br>
+ delete Ops.first;<br>
+ Ops.first = NewOp;<br>
+ }<br>
+ LLVM_DEBUG(dbgs() << "Found final order\n");<br>
+ }<br>
+ } else {<br>
+ LLVM_DEBUG(dbgs() << " NonCommuntative\n");<br>
+ if (ValuesOpcode == Instruction::Load)<br>
+ for (VPValue *V : Values)<br>
+ CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));<br>
+ else<br>
+ for (auto &Operands : getOperands(Values))<br>
+ CombinedOperands.push_back(buildGraph(Operands));<br>
+ }<br>
+<br>
+ unsigned Opcode;<br>
+ switch (ValuesOpcode) {<br>
+ case Instruction::Load:<br>
+ Opcode = VPInstruction::SLPLoad;<br>
+ break;<br>
+ case Instruction::Store:<br>
+ Opcode = VPInstruction::SLPStore;<br>
+ break;<br>
+ default:<br>
+ Opcode = ValuesOpcode;<br>
+ break;<br>
+ }<br>
+<br>
+ if (!CompletelySLP)<br>
+ return markFailed();<br>
+<br>
+ assert(CombinedOperands.size() > 0 && "Need more some operands");<br>
+ auto *VPI = new VPInstruction(Opcode, CombinedOperands);<br>
+ VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());<br>
+<br>
+ LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs());<br>
+ cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() << "\n");<br>
+ addCombined(Values, VPI);<br>
+ return VPI;<br>
+}<br>
<br>
Modified: llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h?rev=346857&r1=346856&r2=346857&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h?rev=346857&r1=346856&r2=346857&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h (original)<br>
+++ llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h Wed Nov 14 05:11:49 2018<br>
@@ -106,6 +106,20 @@ public:<br>
const_user_range users() const {<br>
return const_user_range(user_begin(), user_end());<br>
}<br>
+<br>
+ /// Returns true if the value has more than one unique user.<br>
+ bool hasMoreThanOneUniqueUser() {<br>
+ if (getNumUsers() == 0)<br>
+ return false;<br>
+<br>
+ // Check if all users match the first user.<br>
+ auto Current = std::next(user_begin());<br>
+ while (Current != user_end() && *user_begin() == *Current)<br>
+ Current++;<br>
+ return Current != user_end();<br>
+ }<br>
+<br>
+ void replaceAllUsesWith(VPValue *New);<br>
};<br>
<br>
typedef DenseMap<Value *, VPValue *> Value2VPValueTy;<br>
@@ -151,6 +165,8 @@ public:<br>
return Operands[N];<br>
}<br>
<br>
+ void setOperand(unsigned I, VPValue *New) { Operands[I] = New; }<br>
+<br>
typedef SmallVectorImpl<VPValue *>::iterator operand_iterator;<br>
typedef SmallVectorImpl<VPValue *>::const_iterator const_operand_iterator;<br>
typedef iterator_range<operand_iterator> operand_range;<br>
<br>
Modified: llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt?rev=346857&r1=346856&r2=346857&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt?rev=346857&r1=346856&r2=346857&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt (original)<br>
+++ llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt Wed Nov 14 05:11:49 2018<br>
@@ -10,4 +10,5 @@ add_llvm_unittest(VectorizeTests<br>
VPlanLoopInfoTest.cpp<br>
VPlanTest.cpp<br>
VPlanHCFGTest.cpp<br>
+ VPlanSlpTest.cpp<br>
)<br>
<br>
Added: llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp?rev=346857&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp?rev=346857&view=auto</a><br>
==============================================================================<br>
--- llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp (added)<br>
+++ llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp Wed Nov 14 05:11:49 2018<br>
@@ -0,0 +1,899 @@<br>
+//===- llvm/unittest/Transforms/Vectorize/VPlanSlpTest.cpp ---------------===//<br>
+//<br>
+// The LLVM Compiler Infrastructure<br>
+//<br>
+// This file is distributed under the University of Illinois Open Source<br>
+// License. See LICENSE.TXT for details.<br>
+//<br>
+//===----------------------------------------------------------------------===//<br>
+<br>
+#include "../lib/Transforms/Vectorize/VPlan.h"<br>
+#include "../lib/Transforms/Vectorize/VPlanHCFGBuilder.h"<br>
+#include "../lib/Transforms/Vectorize/VPlanHCFGTransforms.h"<br>
+#include "VPlanTestBase.h"<br>
+#include "llvm/Analysis/VectorUtils.h"<br>
+#include "gtest/gtest.h"<br>
+<br>
+namespace llvm {<br>
+namespace {<br>
+<br>
+class VPlanSlpTest : public VPlanTestBase {<br>
+protected:<br>
+ TargetLibraryInfoImpl TLII;<br>
+ TargetLibraryInfo TLI;<br>
+ DataLayout DL;<br>
+<br>
+ std::unique_ptr<AssumptionCache> AC;<br>
+ std::unique_ptr<ScalarEvolution> SE;<br>
+ std::unique_ptr<AAResults> AARes;<br>
+ std::unique_ptr<BasicAAResult> BasicAA;<br>
+ std::unique_ptr<LoopAccessInfo> LAI;<br>
+ std::unique_ptr<PredicatedScalarEvolution> PSE;<br>
+ std::unique_ptr<InterleavedAccessInfo> IAI;<br>
+<br>
+ VPlanSlpTest()<br>
+ : TLII(), TLI(TLII),<br>
+ DL("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-"<br>
+ "f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:"<br>
+ "16:32:64-S128") {}<br>
+<br>
+ VPInterleavedAccessInfo getInterleavedAccessInfo(Function &F, Loop *L,<br>
+ VPlan &Plan) {<br>
+ AC.reset(new AssumptionCache(F));<br>
+ SE.reset(new ScalarEvolution(F, TLI, *AC, *DT, *LI));<br>
+ BasicAA.reset(new BasicAAResult(DL, F, TLI, *AC, &*DT, &*LI));<br>
+ AARes.reset(new AAResults(TLI));<br>
+ AARes->addAAResult(*BasicAA);<br>
+ PSE.reset(new PredicatedScalarEvolution(*SE, *L));<br>
+ LAI.reset(new LoopAccessInfo(L, &*SE, &TLI, &*AARes, &*DT, &*LI));<br>
+ IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI));<br>
+ IAI->analyzeInterleaving(false);<br>
+ return {Plan, *IAI};<br>
+ }<br>
+};<br>
+<br>
+TEST_F(VPlanSlpTest, testSlpSimple_2) {<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "%struct.Test3 = type { i32, i32, i32 }\n"<br>
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"<br>
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "<br>
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vB0 = load i32, i32* %B0, align 4\n"<br>
+ " %add0 = add nsw i32 %vA0, %vB0\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vB1 = load i32, i32* %B1, align 4\n"<br>
+ " %add1 = add nsw i32 %vA1, %vB1\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store i32 %add0, i32* %C0, align 4\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add1, i32* %C1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x2");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));<br>
+<br>
+ VPlanSlp Slp(VPIAI, *Body);<br>
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};<br>
+ VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);<br>
+ EXPECT_EQ(64u, Slp.getWidestBundleBits());<br>
+ EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());<br>
+<br>
+ auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));<br>
+ EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());<br>
+<br>
+ auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));<br>
+ auto *CombinedLoadB = cast<VPInstruction>(CombinedAdd->getOperand(1));<br>
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());<br>
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());<br>
+}<br>
+<br>
+TEST_F(VPlanSlpTest, testSlpSimple_3) {<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "%struct.Test3 = type { i32, i32, i32 }\n"<br>
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"<br>
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "<br>
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr %struct.Test, %struct.Test* %A, i64 "<br>
+ " %indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ " %indvars.iv, i32 0\n"<br>
+ " %vB0 = load i32, i32* %B0, align 4\n"<br>
+ " %add0 = add nsw i32 %vA0, %vB0\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ " %indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ " %indvars.iv, i32 1\n"<br>
+ " %vB1 = load i32, i32* %B1, align 4\n"<br>
+ " %add1 = add nsw i32 %vA1, %vB1\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ " %indvars.iv, i32 0\n"<br>
+ " store i32 %add0, i32* %C0, align 4\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ " %indvars.iv, i32 1\n"<br>
+ " store i32 %add1, i32* %C1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x2");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));<br>
+<br>
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);<br>
+<br>
+ VPlanSlp Slp(VPIAI, *Body);<br>
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};<br>
+ VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);<br>
+ EXPECT_EQ(64u, Slp.getWidestBundleBits());<br>
+ EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());<br>
+<br>
+ auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));<br>
+ EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());<br>
+<br>
+ auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));<br>
+ auto *CombinedLoadB = cast<VPInstruction>(CombinedAdd->getOperand(1));<br>
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());<br>
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());<br>
+<br>
+ VPInstruction *GetA = cast<VPInstruction>(&*std::next(Body->begin(), 1));<br>
+ VPInstruction *GetB = cast<VPInstruction>(&*std::next(Body->begin(), 3));<br>
+ EXPECT_EQ(GetA, CombinedLoadA->getOperand(0));<br>
+ EXPECT_EQ(GetB, CombinedLoadB->getOperand(0));<br>
+}<br>
+<br>
+TEST_F(VPlanSlpTest, testSlpReuse_1) {<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "<br>
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %add0 = add nsw i32 %vA0, %vA0\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %add1 = add nsw i32 %vA1, %vA1\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store i32 %add0, i32* %C0, align 4\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add1, i32* %C1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x2");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 8));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 10));<br>
+<br>
+ VPlanSlp Slp(VPIAI, *Body);<br>
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};<br>
+ VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);<br>
+ EXPECT_EQ(64u, Slp.getWidestBundleBits());<br>
+ EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());<br>
+<br>
+ auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));<br>
+ EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());<br>
+<br>
+ auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));<br>
+ EXPECT_EQ(CombinedLoadA, CombinedAdd->getOperand(1));<br>
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());<br>
+}<br>
+<br>
+TEST_F(VPlanSlpTest, testSlpReuse_2) {<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "define i32 @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "<br>
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %add0 = add nsw i32 %vA0, %vA0\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store i32 %add0, i32* %C0, align 4\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %add1 = add nsw i32 %vA1, %vA1\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add1, i32* %C1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret i32 %vA1\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x2");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 5));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 10));<br>
+<br>
+ VPlanSlp Slp(VPIAI, *Body);<br>
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};<br>
+ Slp.buildGraph(StoreRoot);<br>
+ EXPECT_FALSE(Slp.isCompletelySLP());<br>
+}<br>
+<br>
+static void checkReorderExample(VPInstruction *Store1, VPInstruction *Store2,<br>
+ VPBasicBlock *Body,<br>
+ VPInterleavedAccessInfo &&IAI) {<br>
+ VPlanSlp Slp(IAI, *Body);<br>
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};<br>
+ VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);<br>
+<br>
+ EXPECT_TRUE(Slp.isCompletelySLP());<br>
+ EXPECT_EQ(CombinedStore->getOpcode(), VPInstruction::SLPStore);<br>
+<br>
+ VPInstruction *CombinedAdd =<br>
+ cast<VPInstruction>(CombinedStore->getOperand(0));<br>
+ EXPECT_EQ(CombinedAdd->getOpcode(), Instruction::Add);<br>
+<br>
+ VPInstruction *CombinedMulAB =<br>
+ cast<VPInstruction>(CombinedAdd->getOperand(0));<br>
+ VPInstruction *CombinedMulCD =<br>
+ cast<VPInstruction>(CombinedAdd->getOperand(1));<br>
+ EXPECT_EQ(CombinedMulAB->getOpcode(), Instruction::Mul);<br>
+<br>
+ VPInstruction *CombinedLoadA =<br>
+ cast<VPInstruction>(CombinedMulAB->getOperand(0));<br>
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());<br>
+ VPInstruction *LoadvA0 = cast<VPInstruction>(&*std::next(Body->begin(), 2));<br>
+ VPInstruction *LoadvA1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));<br>
+ EXPECT_EQ(LoadvA0->getOperand(0), CombinedLoadA->getOperand(0));<br>
+ EXPECT_EQ(LoadvA1->getOperand(0), CombinedLoadA->getOperand(1));<br>
+<br>
+ VPInstruction *CombinedLoadB =<br>
+ cast<VPInstruction>(CombinedMulAB->getOperand(1));<br>
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());<br>
+ VPInstruction *LoadvB0 = cast<VPInstruction>(&*std::next(Body->begin(), 4));<br>
+ VPInstruction *LoadvB1 = cast<VPInstruction>(&*std::next(Body->begin(), 14));<br>
+ EXPECT_EQ(LoadvB0->getOperand(0), CombinedLoadB->getOperand(0));<br>
+ EXPECT_EQ(LoadvB1->getOperand(0), CombinedLoadB->getOperand(1));<br>
+<br>
+ EXPECT_EQ(CombinedMulCD->getOpcode(), Instruction::Mul);<br>
+<br>
+ VPInstruction *CombinedLoadC =<br>
+ cast<VPInstruction>(CombinedMulCD->getOperand(0));<br>
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadC->getOpcode());<br>
+ VPInstruction *LoadvC0 = cast<VPInstruction>(&*std::next(Body->begin(), 7));<br>
+ VPInstruction *LoadvC1 = cast<VPInstruction>(&*std::next(Body->begin(), 17));<br>
+ EXPECT_EQ(LoadvC0->getOperand(0), CombinedLoadC->getOperand(0));<br>
+ EXPECT_EQ(LoadvC1->getOperand(0), CombinedLoadC->getOperand(1));<br>
+<br>
+ VPInstruction *CombinedLoadD =<br>
+ cast<VPInstruction>(CombinedMulCD->getOperand(1));<br>
+ EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadD->getOpcode());<br>
+ VPInstruction *LoadvD0 = cast<VPInstruction>(&*std::next(Body->begin(), 9));<br>
+ VPInstruction *LoadvD1 = cast<VPInstruction>(&*std::next(Body->begin(), 19));<br>
+ EXPECT_EQ(LoadvD0->getOperand(0), CombinedLoadD->getOperand(0));<br>
+ EXPECT_EQ(LoadvD1->getOperand(0), CombinedLoadD->getOperand(1));<br>
+}<br>
+<br>
+TEST_F(VPlanSlpTest, testSlpReorder_1) {<br>
+ LLVMContext Ctx;<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "define void @add_x3(%struct.Test* %A, %struct.Test* %B, %struct.Test* "<br>
+ "%C, %struct.Test* %D, %struct.Test* %E) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vB0 = load i32, i32* %B0, align 4\n"<br>
+ " %mul11 = mul nsw i32 %vA0, %vB0\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vC0 = load i32, i32* %C0, align 4\n"<br>
+ " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vD0 = load i32, i32* %D0, align 4\n"<br>
+ " %mul12 = mul nsw i32 %vC0, %vD0\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vB1 = load i32, i32* %B1, align 4\n"<br>
+ " %mul21 = mul nsw i32 %vA1, %vB1\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vC1 = load i32, i32* %C1, align 4\n"<br>
+ " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vD1 = load i32, i32* %D1, align 4\n"<br>
+ " %mul22 = mul nsw i32 %vC1, %vD1\n"<br>
+ " %add1 = add nsw i32 %mul11, %mul12\n"<br>
+ " %add2 = add nsw i32 %mul22, %mul21\n"<br>
+ " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store i32 %add1, i32* %E0, align 4\n"<br>
+ " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add2, i32* %E1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x3");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));<br>
+<br>
+ checkReorderExample(<br>
+ Store1, Store2, Body,<br>
+ getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));<br>
+}<br>
+<br>
+TEST_F(VPlanSlpTest, testSlpReorder_2) {<br>
+ LLVMContext Ctx;<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "define void @add_x3(%struct.Test* %A, %struct.Test* %B, %struct.Test* "<br>
+ "%C, %struct.Test* %D, %struct.Test* %E) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vB0 = load i32, i32* %B0, align 4\n"<br>
+ " %mul11 = mul nsw i32 %vA0, %vB0\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vC0 = load i32, i32* %C0, align 4\n"<br>
+ " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vD0 = load i32, i32* %D0, align 4\n"<br>
+ " %mul12 = mul nsw i32 %vC0, %vD0\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vB1 = load i32, i32* %B1, align 4\n"<br>
+ " %mul21 = mul nsw i32 %vB1, %vA1\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vC1 = load i32, i32* %C1, align 4\n"<br>
+ " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vD1 = load i32, i32* %D1, align 4\n"<br>
+ " %mul22 = mul nsw i32 %vD1, %vC1\n"<br>
+ " %add1 = add nsw i32 %mul11, %mul12\n"<br>
+ " %add2 = add nsw i32 %mul22, %mul21\n"<br>
+ " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store i32 %add1, i32* %E0, align 4\n"<br>
+ " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add2, i32* %E1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x3");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));<br>
+<br>
+ checkReorderExample(<br>
+ Store1, Store2, Body,<br>
+ getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));<br>
+}<br>
+<br>
+TEST_F(VPlanSlpTest, testSlpReorder_3) {<br>
+ LLVMContext Ctx;<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "define void @add_x3(%struct.Test* %A, %struct.Test* %B, %struct.Test* "<br>
+ "%C, %struct.Test* %D, %struct.Test* %E) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vB0 = load i32, i32* %B0, align 4\n"<br>
+ " %mul11 = mul nsw i32 %vA1, %vB0\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vC0 = load i32, i32* %C0, align 4\n"<br>
+ " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vD0 = load i32, i32* %D0, align 4\n"<br>
+ " %mul12 = mul nsw i32 %vC0, %vD0\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vB1 = load i32, i32* %B1, align 4\n"<br>
+ " %mul21 = mul nsw i32 %vB1, %vA0\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vC1 = load i32, i32* %C1, align 4\n"<br>
+ " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vD1 = load i32, i32* %D1, align 4\n"<br>
+ " %mul22 = mul nsw i32 %vD1, %vC1\n"<br>
+ " %add1 = add nsw i32 %mul11, %mul12\n"<br>
+ " %add2 = add nsw i32 %mul22, %mul21\n"<br>
+ " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store i32 %add1, i32* %E0, align 4\n"<br>
+ " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add2, i32* %E1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x3");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));<br>
+<br>
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);<br>
+ VPlanSlp Slp(VPIAI, *Body);<br>
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};<br>
+ EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));<br>
+<br>
+ // FIXME Need to select better first value for lane0.<br>
+ EXPECT_FALSE(Slp.isCompletelySLP());<br>
+}<br>
+<br>
+TEST_F(VPlanSlpTest, testSlpReorder_4) {<br>
+ LLVMContext Ctx;<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "define void @add_x3(%struct.Test* %A, %struct.Test* %B, %struct.Test* "<br>
+ "%C, %struct.Test* %D, %struct.Test* %E) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vB0 = load i32, i32* %B0, align 4\n"<br>
+ " %mul11 = mul nsw i32 %vA0, %vB0\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vC0 = load i32, i32* %C0, align 4\n"<br>
+ " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vD0 = load i32, i32* %D0, align 4\n"<br>
+ " %mul12 = mul nsw i32 %vC0, %vD0\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vB1 = load i32, i32* %B1, align 4\n"<br>
+ " %mul21 = mul nsw i32 %vA1, %vB1\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vC1 = load i32, i32* %C1, align 4\n"<br>
+ " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vD1 = load i32, i32* %D1, align 4\n"<br>
+ " %mul22 = mul nsw i32 %vC1, %vD1\n"<br>
+ " %add1 = add nsw i32 %mul11, %mul12\n"<br>
+ " %add2 = add nsw i32 %mul22, %mul21\n"<br>
+ " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store i32 %add1, i32* %E0, align 4\n"<br>
+ " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add2, i32* %E1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x3");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 24));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 26));<br>
+<br>
+ checkReorderExample(<br>
+ Store1, Store2, Body,<br>
+ getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));<br>
+}<br>
+<br>
+// Make sure we do not combine instructions with operands in different BBs.<br>
+TEST_F(VPlanSlpTest, testInstrsInDifferentBBs) {<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "%struct.Test3 = type { i32, i32, i32 }\n"<br>
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"<br>
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "<br>
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vB0 = load i32, i32* %B0, align 4\n"<br>
+ " %add0 = add nsw i32 %vA0, %vB0\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " br label %bb2\n"<br>
+ "bb2:\n"<br>
+ " %vB1 = load i32, i32* %B1, align 4\n"<br>
+ " %add1 = add nsw i32 %vA1, %vB1\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store i32 %add0, i32* %C0, align 4\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add1, i32* %C1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x2");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+ VPBasicBlock *BB2 = Body->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(), 3));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(), 5));<br>
+<br>
+ VPlanSlp Slp(VPIAI, *BB2);<br>
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};<br>
+ EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));<br>
+ EXPECT_EQ(0u, Slp.getWidestBundleBits());<br>
+}<br>
+<br>
+// Make sure we do not combine instructions with operands in different BBs.<br>
+TEST_F(VPlanSlpTest, testInstrsInDifferentBBs2) {<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "%struct.Test3 = type { i32, i32, i32 }\n"<br>
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"<br>
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "<br>
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vB0 = load i32, i32* %B0, align 4\n"<br>
+ " %add0 = add nsw i32 %vA0, %vB0\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vB1 = load i32, i32* %B1, align 4\n"<br>
+ " %add1 = add nsw i32 %vA1, %vB1\n"<br>
+ " br label %bb2\n"<br>
+ "bb2:\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store i32 %add0, i32* %C0, align 4\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add1, i32* %C1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x2");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+ VPBasicBlock *BB2 = Body->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(), 1));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(), 3));<br>
+<br>
+ VPlanSlp Slp(VPIAI, *BB2);<br>
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};<br>
+ EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));<br>
+ EXPECT_EQ(0u, Slp.getWidestBundleBits());<br>
+}<br>
+<br>
+TEST_F(VPlanSlpTest, testSlpAtomicLoad) {<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "%struct.Test3 = type { i32, i32, i32 }\n"<br>
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"<br>
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "<br>
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load atomic i32, i32* %A0 monotonic, align 4\n"<br>
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vB0 = load i32, i32* %B0, align 4\n"<br>
+ " %add0 = add nsw i32 %vA0, %vB0\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vB1 = load i32, i32* %B1, align 4\n"<br>
+ " %add1 = add nsw i32 %vA1, %vB1\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store i32 %add0, i32* %C0, align 4\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add1, i32* %C1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x2");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));<br>
+<br>
+ VPlanSlp Slp(VPIAI, *Body);<br>
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};<br>
+ EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));<br>
+ EXPECT_FALSE(Slp.isCompletelySLP());<br>
+}<br>
+<br>
+TEST_F(VPlanSlpTest, testSlpAtomicStore) {<br>
+ const char *ModuleString =<br>
+ "%struct.Test = type { i32, i32 }\n"<br>
+ "%struct.Test3 = type { i32, i32, i32 }\n"<br>
+ "%struct.Test4xi8 = type { i8, i8, i8 }\n"<br>
+ "define void @add_x2(%struct.Test* nocapture readonly %A, %struct.Test* "<br>
+ "nocapture readonly %B, %struct.Test* nocapture %C) {\n"<br>
+ "entry:\n"<br>
+ " br label %for.body\n"<br>
+ "for.body: ; preds = %for.body, "<br>
+ "%entry\n"<br>
+ " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\n"<br>
+ " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vA0 = load i32, i32* %A0, align 4\n"<br>
+ " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " %vB0 = load i32, i32* %B0, align 4\n"<br>
+ " %add0 = add nsw i32 %vA0, %vB0\n"<br>
+ " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vA1 = load i32, i32* %A1, align 4\n"<br>
+ " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " %vB1 = load i32, i32* %B1, align 4\n"<br>
+ " %add1 = add nsw i32 %vA1, %vB1\n"<br>
+ " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 0\n"<br>
+ " store atomic i32 %add0, i32* %C0 monotonic, align 4\n"<br>
+ " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64 "<br>
+ "%indvars.iv, i32 1\n"<br>
+ " store i32 %add1, i32* %C1, align 4\n"<br>
+ " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"<br>
+ " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"<br>
+ " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"<br>
+ "for.cond.cleanup: ; preds = %for.body\n"<br>
+ " ret void\n"<br>
+ "}\n";<br>
+<br>
+ Module &M = parseModule(ModuleString);<br>
+<br>
+ Function *F = M.getFunction("add_x2");<br>
+ BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();<br>
+ auto Plan = buildHCFG(LoopHeader);<br>
+ auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan);<br>
+<br>
+ VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();<br>
+ EXPECT_NE(nullptr, Entry->getSingleSuccessor());<br>
+ VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();<br>
+<br>
+ VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(), 12));<br>
+ VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(), 14));<br>
+<br>
+ VPlanSlp Slp(VPIAI, *Body);<br>
+ SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};<br>
+ Slp.buildGraph(StoreRoot);<br>
+ EXPECT_FALSE(Slp.isCompletelySLP());<br>
+}<br>
+<br>
+} // namespace<br>
+} // namespace llvm<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
</blockquote></div></div>