[llvm] r346857 - [VPlan, SLP] Add simple SLP analysis on top of VPlan.
Benjamin Kramer via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 14 06:42:36 PST 2018
On Wed, Nov 14, 2018 at 2:14 PM Florian Hahn via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: fhahn
> Date: Wed Nov 14 05:11:49 2018
> New Revision: 346857
>
> URL: http://llvm.org/viewvc/llvm-project?rev=346857&view=rev
> Log:
> [VPlan, SLP] Add simple SLP analysis on top of VPlan.
>
> This patch adds an initial implementation of the look-ahead SLP tree
> construction described in 'Look-Ahead SLP: Auto-vectorization in the
> Presence
> of Commutative Operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O.
> Rocha,
> Luís F. W. Góes'.
>
> It returns an SLP tree represented as VPInstructions, with combined
> instructions represented as a single, wider VPInstruction.
>
> This initial version does not support instructions with multiple
> different users (either inside or outside the SLP tree) or
> non-instruction operands; it won't generate any shuffles or
> insertelement instructions.
>
> It also just adds the analysis that builds an SLP tree rooted in a set
> of stores. It does not include any cost modeling or memory legality
> checks. The plan is to integrate it with VPlan based cost modeling, once
> available and to only apply it to operations that can be widened.
>
> A follow-up patch will add a support for replacing instructions in a
> VPlan with their SLP counter parts.
>
> Reviewers: Ayal, mssimpso, rengolin, mkuper, hfinkel, hsaito, dcaballe,
> vporpo, RKSimon, ABataev
>
> Reviewed By: rengolin
>
> Differential Revision: https://reviews.llvm.org/D4949
>
> Added:
> llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp
> llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
> Modified:
> llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt
> llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp
> llvm/trunk/lib/Transforms/Vectorize/VPlan.h
> llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h
> llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt?rev=346857&r1=346856&r2=346857&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/CMakeLists.txt Wed Nov 14 05:11:49
> 2018
> @@ -7,6 +7,7 @@ add_llvm_library(LLVMVectorize
> VPlan.cpp
> VPlanHCFGBuilder.cpp
> VPlanHCFGTransforms.cpp
> + VPlanSLP.cpp
> VPlanVerifier.cpp
>
> ADDITIONAL_HEADER_DIRS
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp?rev=346857&r1=346856&r2=346857&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/VPlan.cpp Wed Nov 14 05:11:49 2018
> @@ -338,6 +338,12 @@ void VPInstruction::print(raw_ostream &O
> case VPInstruction::ICmpULE:
> O << "icmp ule";
> break;
> + case VPInstruction::SLPLoad:
> + O << "combined load";
> + break;
> + case VPInstruction::SLPStore:
> + O << "combined store";
> + break;
> default:
> O << Instruction::getOpcodeName(getOpcode());
> }
> @@ -681,6 +687,13 @@ void VPWidenMemoryInstructionRecipe::pri
>
> template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree
> &DT);
>
> +void VPValue::replaceAllUsesWith(VPValue *New) {
> + for (VPUser *User : users())
> + for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
> + if (User->getOperand(I) == this)
> + User->setOperand(I, New);
> +}
> +
> void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
> Old2NewTy &Old2New,
> InterleavedAccessInfo &IAI) {
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/VPlan.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlan.h?rev=346857&r1=346856&r2=346857&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/VPlan.h (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/VPlan.h Wed Nov 14 05:11:49 2018
> @@ -60,6 +60,7 @@ class Value;
> class VPBasicBlock;
> class VPRegionBlock;
> class VPlan;
> +class VPlanSlp;
>
> /// A range of powers-of-2 vectorization factors with fixed start and
> /// adjustable end. The range includes start and excludes end, e.g.,:
> @@ -609,10 +610,16 @@ public:
> /// the VPInstruction is also a single def-use vertex.
> class VPInstruction : public VPUser, public VPRecipeBase {
> friend class VPlanHCFGTransforms;
> + friend class VPlanSlp;
>
> public:
> /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
> - enum { Not = Instruction::OtherOpsEnd + 1, ICmpULE };
> + enum {
> + Not = Instruction::OtherOpsEnd + 1,
> + ICmpULE,
> + SLPLoad,
> + SLPStore,
> + };
>
> private:
> typedef unsigned char OpcodeTy;
> @@ -622,6 +629,13 @@ private:
> /// modeled instruction.
> void generateInstruction(VPTransformState &State, unsigned Part);
>
> +protected:
> + Instruction *getUnderlyingInstr() {
> + return cast_or_null<Instruction>(getUnderlyingValue());
> + }
> +
> + void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
> +
> public:
> VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands)
> : VPUser(VPValue::VPInstructionSC, Operands),
> @@ -635,6 +649,11 @@ public:
> return V->getVPValueID() == VPValue::VPInstructionSC;
> }
>
> + VPInstruction *clone() const {
> + SmallVector<VPValue *, 2> Operands(operands());
> + return new VPInstruction(Opcode, Operands);
> + }
> +
> /// Method to support type inquiry through isa, cast, and dyn_cast.
> static inline bool classof(const VPRecipeBase *R) {
> return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC;
> @@ -652,6 +671,14 @@ public:
>
> /// Print the VPInstruction.
> void print(raw_ostream &O) const;
> +
> + /// Return true if this instruction may modify memory.
> + bool mayWriteToMemory() const {
> + // TODO: we can use attributes of the called function to rule out
> memory
> + // modifications.
> + return Opcode == Instruction::Store || Opcode == Instruction::Call ||
> + Opcode == Instruction::Invoke || Opcode == SLPStore;
> + }
> };
>
> /// VPWidenRecipe is a recipe for producing a copy of vector type for each
> @@ -1508,6 +1535,102 @@ public:
> }
> };
>
> +/// Class that maps (parts of) an existing VPlan to trees of combined
> +/// VPInstructions.
> +class VPlanSlp {
> +private:
> + enum class OpMode { Failed, Load, Opcode };
> +
> + /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
> + /// DenseMap keys.
> + struct BundleDenseMapInfo {
> + static SmallVector<VPValue *, 4> getEmptyKey() {
> + return {reinterpret_cast<VPValue *>(-1)};
> + }
> +
> + static SmallVector<VPValue *, 4> getTombstoneKey() {
> + return {reinterpret_cast<VPValue *>(-2)};
> + }
> +
> + static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
> + return static_cast<unsigned>(hash_combine_range(V.begin(),
> V.end()));
> + }
> +
> + static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
> + const SmallVector<VPValue *, 4> &RHS) {
> + return LHS == RHS;
> + }
> + };
> +
> + /// Mapping of values in the original VPlan to a combined VPInstruction.
> + DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
> + BundleToCombined;
> +
> + VPInterleavedAccessInfo &IAI;
> +
> + /// Basic block to operate on. For now, only instructions in a single
> BB are
> + /// considered.
> + const VPBasicBlock &BB;
> +
> + /// Indicates whether we managed to combine all visited instructions or
> not.
> + bool CompletelySLP = true;
> +
> + /// Width of the widest combined bundle in bits.
> + unsigned WidestBundleBits = 0;
> +
> + using MultiNodeOpTy =
> + typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
> +
> + // Input operand bundles for the current multi node. Each multi node
> operand
> + // bundle contains values not matching the multi node's opcode. They
> will
> + // be reordered in reorderMultiNodeOps, once we completed building a
> + // multi node.
> + SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
> +
> + /// Indicates whether we are building a multi node currently.
> + bool MultiNodeActive = false;
> +
> + /// Check if we can vectorize Operands together.
> + bool areVectorizable(ArrayRef<VPValue *> Operands) const;
> +
> + /// Add combined instruction \p New for the bundle \p Operands.
> + void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
> +
> + /// Indicate we hit a bundle we failed to combine. Returns nullptr for
> now.
> + VPInstruction *markFailed();
> +
> + /// Reorder operands in the multi node to maximize sequential memory
> access
> + /// and commutative operations.
> + SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
> +
> + /// Choose the best candidate to use for the lane after \p Last. The
> set of
> + /// candidates to choose from are values with an opcode matching \p
> Last's
> + /// or loads consecutive to \p Last.
> + std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
> + SmallVectorImpl<VPValue *>
> &Candidates,
> + VPInterleavedAccessInfo &IAI);
> +
> + /// Print bundle \p Values to dbgs().
> + void dumpBundle(ArrayRef<VPValue *> Values);
> +
> +public:
> + VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI),
> BB(BB) {}
> +
> + ~VPlanSlp() {
> + for (auto &KV : BundleToCombined)
> + delete KV.second;
> + }
> +
> + /// Tries to build an SLP tree rooted at \p Operands and returns a
> + /// VPInstruction combining \p Operands, if they can be combined.
> + VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
> +
> + /// Return the width of the widest combined bundle in bits.
> + unsigned getWidestBundleBits() const { return WidestBundleBits; }
> +
> + /// Return true if all visited instruction can be combined.
> + bool isCompletelySLP() const { return CompletelySLP; }
> +};
> } // end namespace llvm
>
> #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
>
> Added: llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp?rev=346857&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp (added)
> +++ llvm/trunk/lib/Transforms/Vectorize/VPlanSLP.cpp Wed Nov 14 05:11:49
> 2018
> @@ -0,0 +1,469 @@
> +//===- VPlanSLP.cpp - SLP Analysis based on VPlan
> -------------------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +/// This file implements SLP analysis based on VPlan. The analysis is
> based on
> +/// the ideas described in
> +///
> +/// Look-ahead SLP: auto-vectorization in the presence of commutative
> +/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
> +/// LuÃs F. W. Góes
> +///
>
> +//===----------------------------------------------------------------------===//
> +
> +#include "VPlan.h"
> +#include "llvm/ADT/DepthFirstIterator.h"
> +#include "llvm/ADT/PostOrderIterator.h"
> +#include "llvm/ADT/SmallVector.h"
> +#include "llvm/ADT/Twine.h"
> +#include "llvm/Analysis/LoopInfo.h"
> +#include "llvm/Analysis/VectorUtils.h"
> +#include "llvm/IR/BasicBlock.h"
> +#include "llvm/IR/CFG.h"
> +#include "llvm/IR/Dominators.h"
> +#include "llvm/IR/InstrTypes.h"
> +#include "llvm/IR/Instruction.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/IR/Type.h"
> +#include "llvm/IR/Value.h"
> +#include "llvm/Support/Casting.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/ErrorHandling.h"
> +#include "llvm/Support/GraphWriter.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/Transforms/Utils/BasicBlockUtils.h"
> +#include <cassert>
> +#include <iterator>
> +#include <string>
> +#include <vector>
> +
> +using namespace llvm;
> +
> +#define DEBUG_TYPE "vplan-slp"
> +
> +// Number of levels to look ahead when re-ordering multi node operands.
> +static unsigned LookaheadMaxDepth = 5;
> +
> +VPInstruction *VPlanSlp::markFailed() {
> + // FIXME: Currently this is used to signal we hit instructions we cannot
> + // trivially SLP'ize.
> + CompletelySLP = false;
> + return nullptr;
> +}
> +
> +void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction
> *New) {
> + if (all_of(Operands, [](VPValue *V) {
> + return cast<VPInstruction>(V)->getUnderlyingInstr();
> + })) {
> + unsigned BundleSize = 0;
> + for (VPValue *V : Operands) {
> + Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
> + assert(!T->isVectorTy() && "Only scalar types supported for now");
> + BundleSize += T->getScalarSizeInBits();
> + }
> + WidestBundleBits = std::max(WidestBundleBits, BundleSize);
> + }
> +
> + auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
> + assert(Res.second &&
> + "Already created a combined instruction for the operand bundle");
> + (void)Res;
> +}
> +
> +bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
> + // Currently we only support VPInstructions.
> + if (!all_of(Operands, [](VPValue *Op) {
> + return Op && isa<VPInstruction>(Op) &&
> + cast<VPInstruction>(Op)->getUnderlyingInstr();
> + })) {
> + LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
> + return false;
> + }
> +
> + // Check if opcodes and type width agree for all instructions in the
> bundle.
> + // FIXME: Differing widths/opcodes can be handled by inserting
> additional
> + // instructions.
> + // FIXME: Deal with non-primitive types.
> + const Instruction *OriginalInstr =
> + cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
> + unsigned Opcode = OriginalInstr->getOpcode();
> + unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
> + if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
> + const Instruction *I =
> cast<VPInstruction>(Op)->getUnderlyingInstr();
> + return I->getOpcode() == Opcode &&
> + I->getType()->getPrimitiveSizeInBits() == Width;
> + })) {
> + LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
> + return false;
> + }
> +
> + // For now, all operands must be defined in the same BB.
> + if (any_of(Operands, [this](VPValue *Op) {
> + return cast<VPInstruction>(Op)->getParent() != &this->BB;
> + })) {
> + LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
> + return false;
> + }
> +
> + if (any_of(Operands,
> + [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); }))
> {
> + LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
> + return false;
> + }
> +
> + // For loads, check that there are no instructions writing to memory in
> + // between them.
> + // TODO: we only have to forbid instructions writing to memory that
> could
> + // interfere with any of the loads in the bundle
> + if (Opcode == Instruction::Load) {
> + unsigned LoadsSeen = 0;
> + VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
> + for (auto &I : *Parent) {
> + auto *VPI = cast<VPInstruction>(&I);
> + if (VPI->getOpcode() == Instruction::Load &&
> + std::find(Operands.begin(), Operands.end(), VPI) !=
> Operands.end())
> + LoadsSeen++;
> +
> + if (LoadsSeen == Operands.size())
> + break;
> + if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
> + LLVM_DEBUG(
> + dbgs() << "VPSLP: instruction modifying memory between
> loads\n");
> + return false;
> + }
> + }
> +
> + if (!all_of(Operands, [](VPValue *Op) {
> + return
> cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
> + ->isSimple();
> + })) {
> + LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
> + return false;
> + }
> + }
> +
> + if (Opcode == Instruction::Store)
> + if (!all_of(Operands, [](VPValue *Op) {
> + return
> cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
> + ->isSimple();
> + })) {
> + LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
> + return false;
> + }
> +
> + return true;
> +}
> +
> +static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
> + unsigned OperandIndex) {
> + SmallVector<VPValue *, 4> Operands;
> + for (VPValue *V : Values) {
> + auto *U = cast<VPUser>(V);
> + Operands.push_back(U->getOperand(OperandIndex));
> + }
> + return Operands;
> +}
> +
> +static bool areCommutative(ArrayRef<VPValue *> Values) {
> + return Instruction::isCommutative(
> + cast<VPInstruction>(Values[0])->getOpcode());
> +}
> +
> +static SmallVector<SmallVector<VPValue *, 4>, 4>
> +getOperands(ArrayRef<VPValue *> Values) {
> + SmallVector<SmallVector<VPValue *, 4>, 4> Result;
> + auto *VPI = cast<VPInstruction>(Values[0]);
> +
> + switch (VPI->getOpcode()) {
> + case Instruction::Load:
> + llvm_unreachable("Loads terminate a tree, no need to get operands");
> + case Instruction::Store:
> + Result.push_back(getOperands(Values, 0));
> + break;
> + default:
> + for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
> + Result.push_back(getOperands(Values, I));
> + break;
> + }
> +
> + return Result;
> +}
> +
> +/// Returns the opcode of Values or ~0 if they do not all agree.
> +static Optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
> + unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
> + if (any_of(Values, [Opcode](VPValue *V) {
> + return cast<VPInstruction>(V)->getOpcode() != Opcode;
> + }))
> + return None;
> + return {Opcode};
> +}
> +
> +/// Returns true if A and B access sequential memory if they are loads or
> +/// stores or if they have identical opcodes otherwise.
> +static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
> + VPInterleavedAccessInfo &IAI) {
> + if (A->getOpcode() != B->getOpcode())
> + return false;
> +
> + if (A->getOpcode() != Instruction::Load &&
> + A->getOpcode() != Instruction::Store)
> + return true;
> + auto *GA = IAI.getInterleaveGroup(A);
> + auto *GB = IAI.getInterleaveGroup(B);
> +
> + return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
> +}
> +
> +/// Implements getLAScore from Listing 7 in the paper.
> +/// Traverses and compares operands of V1 and V2 to MaxLevel.
> +static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
> + VPInterleavedAccessInfo &IAI) {
> + if (!isa<VPInstruction>(V1) || !isa<VPInstruction>(V2))
> + return 0;
> +
> + if (MaxLevel == 0)
> + return (unsigned)areConsecutiveOrMatch(cast<VPInstruction>(V1),
> + cast<VPInstruction>(V2), IAI);
> +
> + unsigned Score = 0;
> + for (unsigned I = 0, EV1 = cast<VPUser>(V1)->getNumOperands(); I < EV1;
> ++I)
> + for (unsigned J = 0, EV2 = cast<VPUser>(V2)->getNumOperands(); J <
> EV2; ++J)
> + Score += getLAScore(cast<VPUser>(V1)->getOperand(I),
> + cast<VPUser>(V2)->getOperand(J), MaxLevel - 1,
> IAI);
> + return Score;
> +}
> +
> +std::pair<VPlanSlp::OpMode, VPValue *>
> +VPlanSlp::getBest(OpMode Mode, VPValue *Last,
> + SmallVectorImpl<VPValue *> &Candidates,
> + VPInterleavedAccessInfo &IAI) {
> + LLVM_DEBUG(dbgs() << " getBest\n");
> + VPValue *Best = Candidates[0];
> + SmallVector<VPValue *, 4> BestCandidates;
> +
> + LLVM_DEBUG(dbgs() << " Candidates for "
> + << *cast<VPInstruction>(Last)->getUnderlyingInstr()
> << " ");
> + for (auto *Candidate : Candidates) {
> + auto *LastI = cast<VPInstruction>(Last);
> + auto *CandidateI = cast<VPInstruction>(Candidate);
> + if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
> + LLVM_DEBUG(dbgs() <<
> *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
> + << " ");
> + BestCandidates.push_back(Candidate);
> + }
> + }
> + LLVM_DEBUG(dbgs() << "\n");
> +
> + if (BestCandidates.empty())
> + return {OpMode::Failed, nullptr};
> +
> + if (BestCandidates.size() == 1)
> + return {Mode, BestCandidates[0]};
> +
> + if (Mode == OpMode::Opcode) {
> + unsigned BestScore = 0;
> + for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
> + unsigned PrevScore = ~0u;
> + bool AllSame = true;
> +
> + // FIXME: Avoid visiting the same operands multiple times.
> + for (auto *Candidate : BestCandidates) {
> + unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
> + if (PrevScore == ~0u)
> + PrevScore = Score;
> + if (PrevScore != Score)
> + AllSame = false;
> + PrevScore = Score;
> +
> + if (Score > BestScore) {
> + BestScore = Score;
> + Best = Candidate;
> + }
> + }
> + if (!AllSame)
> + break;
> + }
> + }
> + LLVM_DEBUG(dbgs() << "Found best "
> + << *cast<VPInstruction>(Best)->getUnderlyingInstr()
> + << "\n");
> + std::remove(Candidates.begin(), Candidates.end(), Best);
>
What's this supposed to do? Right now it moves "Best" to the end of the
vector if it's there. That doesn't look like it's intentional.
> +
> + return {Mode, Best};
> +}
> +
> +SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
> + SmallVector<MultiNodeOpTy, 4> FinalOrder;
> + SmallVector<OpMode, 4> Mode;
> + FinalOrder.reserve(MultiNodeOps.size());
> + Mode.reserve(MultiNodeOps.size());
> +
> + LLVM_DEBUG(dbgs() << "Reordering multinode\n");
> +
> + for (auto &Operands : MultiNodeOps) {
> + FinalOrder.push_back({Operands.first, {Operands.second[0]}});
> + if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
> + Instruction::Load)
> + Mode.push_back(OpMode::Load);
> + else
> + Mode.push_back(OpMode::Opcode);
> + }
> +
> + for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E;
> ++Lane) {
> + LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane <<
> "\n");
> + SmallVector<VPValue *, 4> Candidates;
> + Candidates.reserve(MultiNodeOps.size());
> + LLVM_DEBUG(dbgs() << " Candidates ");
> + for (auto Ops : MultiNodeOps) {
> + LLVM_DEBUG(
> + dbgs() <<
> *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
> + << " ");
> + Candidates.push_back(Ops.second[Lane]);
> + }
> + LLVM_DEBUG(dbgs() << "\n");
> +
> + for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
> + LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");
> + if (Mode[Op] == OpMode::Failed)
> + continue;
> +
> + VPValue *Last = FinalOrder[Op].second[Lane - 1];
> + std::pair<OpMode, VPValue *> Res =
> + getBest(Mode[Op], Last, Candidates, IAI);
> + if (Res.second)
> + FinalOrder[Op].second.push_back(Res.second);
> + else
> + // TODO: handle this case
> + FinalOrder[Op].second.push_back(markFailed());
> + }
> + }
> +
> + return FinalOrder;
> +}
> +
> +void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
> + LLVM_DEBUG(dbgs() << " Ops: ");
> + for (auto Op : Values)
> + if (auto *Instr =
> cast_or_null<VPInstruction>(Op)->getUnderlyingInstr())
> + LLVM_DEBUG(dbgs() << *Instr << " | ");
> + else
> + LLVM_DEBUG(dbgs() << " nullptr | ");
> + LLVM_DEBUG(dbgs() << "\n");
> +}
> +
> +VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
> + assert(!Values.empty() && "Need some operands!");
> +
> + // If we already visited this instruction bundle, re-use the existing
> node
> + auto I = BundleToCombined.find(to_vector<4>(Values));
> + if (I != BundleToCombined.end()) {
> +#ifdef NDEBUG
> + // Check that the resulting graph is a tree. If we re-use a node,
> this means
> + // its values have multiple users. We only allow this, if all users
> of each
> + // value are the same instruction.
> + for (auto *V : Values) {
> + auto UI = V->user_begin();
> + auto *FirstUser = *UI++;
> + while (UI != V->use_end()) {
> + assert(*UI == FirstUser && "Currently we only support SLP
> trees.");
> + UI++;
> + }
> + }
> +#endif
> + return I->second;
> + }
> +
> + // Dump inputs
> + LLVM_DEBUG({
> + dbgs() << "buildGraph: ";
> + dumpBundle(Values);
> + });
> +
> + if (!areVectorizable(Values))
> + return markFailed();
> +
> + assert(getOpcode(Values) && "Opcodes for all values must match");
> + unsigned ValuesOpcode = getOpcode(Values).getValue();
> +
> + SmallVector<VPValue *, 4> CombinedOperands;
> + if (areCommutative(Values)) {
> + bool MultiNodeRoot = !MultiNodeActive;
> + MultiNodeActive = true;
> + for (auto &Operands : getOperands(Values)) {
> + LLVM_DEBUG({
> + dbgs() << " Visiting Commutative";
> + dumpBundle(Operands);
> + });
> +
> + auto OperandsOpcode = getOpcode(Operands);
> + if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
> + LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
> + CombinedOperands.push_back(buildGraph(Operands));
> + } else {
> + LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
> + // Create dummy VPInstruction, which will we replace later by the
> + // re-ordered operand.
> + VPInstruction *Op = new VPInstruction(0, {});
> + CombinedOperands.push_back(Op);
> + MultiNodeOps.emplace_back(Op, Operands);
> + }
> + }
> +
> + if (MultiNodeRoot) {
> + LLVM_DEBUG(dbgs() << "Reorder \n");
> + MultiNodeActive = false;
> +
> + auto FinalOrder = reorderMultiNodeOps();
> +
> + MultiNodeOps.clear();
> + for (auto &Ops : FinalOrder) {
> + VPInstruction *NewOp = buildGraph(Ops.second);
> + Ops.first->replaceAllUsesWith(NewOp);
> + for (unsigned i = 0; i < CombinedOperands.size(); i++)
> + if (CombinedOperands[i] == Ops.first)
> + CombinedOperands[i] = NewOp;
> + delete Ops.first;
> + Ops.first = NewOp;
> + }
> + LLVM_DEBUG(dbgs() << "Found final order\n");
> + }
> + } else {
> + LLVM_DEBUG(dbgs() << " NonCommuntative\n");
> + if (ValuesOpcode == Instruction::Load)
> + for (VPValue *V : Values)
> + CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
> + else
> + for (auto &Operands : getOperands(Values))
> + CombinedOperands.push_back(buildGraph(Operands));
> + }
> +
> + unsigned Opcode;
> + switch (ValuesOpcode) {
> + case Instruction::Load:
> + Opcode = VPInstruction::SLPLoad;
> + break;
> + case Instruction::Store:
> + Opcode = VPInstruction::SLPStore;
> + break;
> + default:
> + Opcode = ValuesOpcode;
> + break;
> + }
> +
> + if (!CompletelySLP)
> + return markFailed();
> +
> + assert(CombinedOperands.size() > 0 && "Need more some operands");
> + auto *VPI = new VPInstruction(Opcode, CombinedOperands);
> +
> VPI->setUnderlyingInstr(cast<VPInstruction>(Values[0])->getUnderlyingInstr());
> +
> + LLVM_DEBUG(dbgs() << "Create VPInstruction "; VPI->print(dbgs());
> + cast<VPInstruction>(Values[0])->print(dbgs()); dbgs() <<
> "\n");
> + addCombined(Values, VPI);
> + return VPI;
> +}
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h?rev=346857&r1=346856&r2=346857&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/VPlanValue.h Wed Nov 14 05:11:49
> 2018
> @@ -106,6 +106,20 @@ public:
> const_user_range users() const {
> return const_user_range(user_begin(), user_end());
> }
> +
> + /// Returns true if the value has more than one unique user.
> + bool hasMoreThanOneUniqueUser() {
> + if (getNumUsers() == 0)
> + return false;
> +
> + // Check if all users match the first user.
> + auto Current = std::next(user_begin());
> + while (Current != user_end() && *user_begin() == *Current)
> + Current++;
> + return Current != user_end();
> + }
> +
> + void replaceAllUsesWith(VPValue *New);
> };
>
> typedef DenseMap<Value *, VPValue *> Value2VPValueTy;
> @@ -151,6 +165,8 @@ public:
> return Operands[N];
> }
>
> + void setOperand(unsigned I, VPValue *New) { Operands[I] = New; }
> +
> typedef SmallVectorImpl<VPValue *>::iterator operand_iterator;
> typedef SmallVectorImpl<VPValue *>::const_iterator
> const_operand_iterator;
> typedef iterator_range<operand_iterator> operand_range;
>
> Modified: llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt?rev=346857&r1=346856&r2=346857&view=diff
>
> ==============================================================================
> --- llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt (original)
> +++ llvm/trunk/unittests/Transforms/Vectorize/CMakeLists.txt Wed Nov 14
> 05:11:49 2018
> @@ -10,4 +10,5 @@ add_llvm_unittest(VectorizeTests
> VPlanLoopInfoTest.cpp
> VPlanTest.cpp
> VPlanHCFGTest.cpp
> + VPlanSlpTest.cpp
> )
>
> Added: llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp?rev=346857&view=auto
>
> ==============================================================================
> --- llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp (added)
> +++ llvm/trunk/unittests/Transforms/Vectorize/VPlanSlpTest.cpp Wed Nov 14
> 05:11:49 2018
> @@ -0,0 +1,899 @@
> +//===- llvm/unittest/Transforms/Vectorize/VPlanSlpTest.cpp
> ---------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#include "../lib/Transforms/Vectorize/VPlan.h"
> +#include "../lib/Transforms/Vectorize/VPlanHCFGBuilder.h"
> +#include "../lib/Transforms/Vectorize/VPlanHCFGTransforms.h"
> +#include "VPlanTestBase.h"
> +#include "llvm/Analysis/VectorUtils.h"
> +#include "gtest/gtest.h"
> +
> +namespace llvm {
> +namespace {
> +
> +class VPlanSlpTest : public VPlanTestBase {
> +protected:
> + TargetLibraryInfoImpl TLII;
> + TargetLibraryInfo TLI;
> + DataLayout DL;
> +
> + std::unique_ptr<AssumptionCache> AC;
> + std::unique_ptr<ScalarEvolution> SE;
> + std::unique_ptr<AAResults> AARes;
> + std::unique_ptr<BasicAAResult> BasicAA;
> + std::unique_ptr<LoopAccessInfo> LAI;
> + std::unique_ptr<PredicatedScalarEvolution> PSE;
> + std::unique_ptr<InterleavedAccessInfo> IAI;
> +
> + VPlanSlpTest()
> + : TLII(), TLI(TLII),
> +
> DL("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-"
> +
> "f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:"
> + "16:32:64-S128") {}
> +
> + VPInterleavedAccessInfo getInterleavedAccessInfo(Function &F, Loop *L,
> + VPlan &Plan) {
> + AC.reset(new AssumptionCache(F));
> + SE.reset(new ScalarEvolution(F, TLI, *AC, *DT, *LI));
> + BasicAA.reset(new BasicAAResult(DL, F, TLI, *AC, &*DT, &*LI));
> + AARes.reset(new AAResults(TLI));
> + AARes->addAAResult(*BasicAA);
> + PSE.reset(new PredicatedScalarEvolution(*SE, *L));
> + LAI.reset(new LoopAccessInfo(L, &*SE, &TLI, &*AARes, &*DT, &*LI));
> + IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI));
> + IAI->analyzeInterleaving(false);
> + return {Plan, *IAI};
> + }
> +};
> +
> +TEST_F(VPlanSlpTest, testSlpSimple_2) {
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "%struct.Test3 = type { i32, i32, i32 }\n"
> + "%struct.Test4xi8 = type { i8, i8, i8 }\n"
> + "define void @add_x2(%struct.Test* nocapture readonly %A,
> %struct.Test* "
> + "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vB0 = load i32, i32* %B0, align 4\n"
> + " %add0 = add nsw i32 %vA0, %vB0\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vB1 = load i32, i32* %B1, align 4\n"
> + " %add1 = add nsw i32 %vA1, %vB1\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store i32 %add0, i32* %C0, align 4\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add1, i32* %C1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x2");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> + auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader),
> *Plan);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 12));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(),
> 14));
> +
> + VPlanSlp Slp(VPIAI, *Body);
> + SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
> + VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
> + EXPECT_EQ(64u, Slp.getWidestBundleBits());
> + EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());
> +
> + auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));
> + EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());
> +
> + auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));
> + auto *CombinedLoadB = cast<VPInstruction>(CombinedAdd->getOperand(1));
> + EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
> + EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
> +}
> +
> +TEST_F(VPlanSlpTest, testSlpSimple_3) {
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "%struct.Test3 = type { i32, i32, i32 }\n"
> + "%struct.Test4xi8 = type { i8, i8, i8 }\n"
> + "define void @add_x2(%struct.Test* nocapture readonly %A,
> %struct.Test* "
> + "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr %struct.Test, %struct.Test* %A, i64 "
> + " %indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + " %indvars.iv, i32 0\n"
> + " %vB0 = load i32, i32* %B0, align 4\n"
> + " %add0 = add nsw i32 %vA0, %vB0\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + " %indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + " %indvars.iv, i32 1\n"
> + " %vB1 = load i32, i32* %B1, align 4\n"
> + " %add1 = add nsw i32 %vA1, %vB1\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + " %indvars.iv, i32 0\n"
> + " store i32 %add0, i32* %C0, align 4\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + " %indvars.iv, i32 1\n"
> + " store i32 %add1, i32* %C1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x2");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 12));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(),
> 14));
> +
> + auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader),
> *Plan);
> +
> + VPlanSlp Slp(VPIAI, *Body);
> + SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
> + VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
> + EXPECT_EQ(64u, Slp.getWidestBundleBits());
> + EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());
> +
> + auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));
> + EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());
> +
> + auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));
> + auto *CombinedLoadB = cast<VPInstruction>(CombinedAdd->getOperand(1));
> + EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
> + EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
> +
> + VPInstruction *GetA = cast<VPInstruction>(&*std::next(Body->begin(),
> 1));
> + VPInstruction *GetB = cast<VPInstruction>(&*std::next(Body->begin(),
> 3));
> + EXPECT_EQ(GetA, CombinedLoadA->getOperand(0));
> + EXPECT_EQ(GetB, CombinedLoadB->getOperand(0));
> +}
> +
> +TEST_F(VPlanSlpTest, testSlpReuse_1) {
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "define void @add_x2(%struct.Test* nocapture readonly %A,
> %struct.Test* "
> + "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %add0 = add nsw i32 %vA0, %vA0\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %add1 = add nsw i32 %vA1, %vA1\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store i32 %add0, i32* %C0, align 4\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add1, i32* %C1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x2");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> + auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader),
> *Plan);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 8));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(),
> 10));
> +
> + VPlanSlp Slp(VPIAI, *Body);
> + SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
> + VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
> + EXPECT_EQ(64u, Slp.getWidestBundleBits());
> + EXPECT_EQ(VPInstruction::SLPStore, CombinedStore->getOpcode());
> +
> + auto *CombinedAdd = cast<VPInstruction>(CombinedStore->getOperand(0));
> + EXPECT_EQ(Instruction::Add, CombinedAdd->getOpcode());
> +
> + auto *CombinedLoadA = cast<VPInstruction>(CombinedAdd->getOperand(0));
> + EXPECT_EQ(CombinedLoadA, CombinedAdd->getOperand(1));
> + EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
> +}
> +
> +TEST_F(VPlanSlpTest, testSlpReuse_2) {
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "define i32 @add_x2(%struct.Test* nocapture readonly %A,
> %struct.Test* "
> + "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %add0 = add nsw i32 %vA0, %vA0\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store i32 %add0, i32* %C0, align 4\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %add1 = add nsw i32 %vA1, %vA1\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add1, i32* %C1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret i32 %vA1\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x2");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> + auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader),
> *Plan);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 5));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(),
> 10));
> +
> + VPlanSlp Slp(VPIAI, *Body);
> + SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
> + Slp.buildGraph(StoreRoot);
> + EXPECT_FALSE(Slp.isCompletelySLP());
> +}
> +
> +static void checkReorderExample(VPInstruction *Store1, VPInstruction
> *Store2,
> + VPBasicBlock *Body,
> + VPInterleavedAccessInfo &&IAI) {
> + VPlanSlp Slp(IAI, *Body);
> + SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
> + VPInstruction *CombinedStore = Slp.buildGraph(StoreRoot);
> +
> + EXPECT_TRUE(Slp.isCompletelySLP());
> + EXPECT_EQ(CombinedStore->getOpcode(), VPInstruction::SLPStore);
> +
> + VPInstruction *CombinedAdd =
> + cast<VPInstruction>(CombinedStore->getOperand(0));
> + EXPECT_EQ(CombinedAdd->getOpcode(), Instruction::Add);
> +
> + VPInstruction *CombinedMulAB =
> + cast<VPInstruction>(CombinedAdd->getOperand(0));
> + VPInstruction *CombinedMulCD =
> + cast<VPInstruction>(CombinedAdd->getOperand(1));
> + EXPECT_EQ(CombinedMulAB->getOpcode(), Instruction::Mul);
> +
> + VPInstruction *CombinedLoadA =
> + cast<VPInstruction>(CombinedMulAB->getOperand(0));
> + EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadA->getOpcode());
> + VPInstruction *LoadvA0 = cast<VPInstruction>(&*std::next(Body->begin(),
> 2));
> + VPInstruction *LoadvA1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 12));
> + EXPECT_EQ(LoadvA0->getOperand(0), CombinedLoadA->getOperand(0));
> + EXPECT_EQ(LoadvA1->getOperand(0), CombinedLoadA->getOperand(1));
> +
> + VPInstruction *CombinedLoadB =
> + cast<VPInstruction>(CombinedMulAB->getOperand(1));
> + EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadB->getOpcode());
> + VPInstruction *LoadvB0 = cast<VPInstruction>(&*std::next(Body->begin(),
> 4));
> + VPInstruction *LoadvB1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 14));
> + EXPECT_EQ(LoadvB0->getOperand(0), CombinedLoadB->getOperand(0));
> + EXPECT_EQ(LoadvB1->getOperand(0), CombinedLoadB->getOperand(1));
> +
> + EXPECT_EQ(CombinedMulCD->getOpcode(), Instruction::Mul);
> +
> + VPInstruction *CombinedLoadC =
> + cast<VPInstruction>(CombinedMulCD->getOperand(0));
> + EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadC->getOpcode());
> + VPInstruction *LoadvC0 = cast<VPInstruction>(&*std::next(Body->begin(),
> 7));
> + VPInstruction *LoadvC1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 17));
> + EXPECT_EQ(LoadvC0->getOperand(0), CombinedLoadC->getOperand(0));
> + EXPECT_EQ(LoadvC1->getOperand(0), CombinedLoadC->getOperand(1));
> +
> + VPInstruction *CombinedLoadD =
> + cast<VPInstruction>(CombinedMulCD->getOperand(1));
> + EXPECT_EQ(VPInstruction::SLPLoad, CombinedLoadD->getOpcode());
> + VPInstruction *LoadvD0 = cast<VPInstruction>(&*std::next(Body->begin(),
> 9));
> + VPInstruction *LoadvD1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 19));
> + EXPECT_EQ(LoadvD0->getOperand(0), CombinedLoadD->getOperand(0));
> + EXPECT_EQ(LoadvD1->getOperand(0), CombinedLoadD->getOperand(1));
> +}
> +
> +TEST_F(VPlanSlpTest, testSlpReorder_1) {
> + LLVMContext Ctx;
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "define void @add_x3(%struct.Test* %A, %struct.Test* %B,
> %struct.Test* "
> + "%C, %struct.Test* %D, %struct.Test* %E) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vB0 = load i32, i32* %B0, align 4\n"
> + " %mul11 = mul nsw i32 %vA0, %vB0\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vC0 = load i32, i32* %C0, align 4\n"
> + " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vD0 = load i32, i32* %D0, align 4\n"
> + " %mul12 = mul nsw i32 %vC0, %vD0\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vB1 = load i32, i32* %B1, align 4\n"
> + " %mul21 = mul nsw i32 %vA1, %vB1\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vC1 = load i32, i32* %C1, align 4\n"
> + " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vD1 = load i32, i32* %D1, align 4\n"
> + " %mul22 = mul nsw i32 %vC1, %vD1\n"
> + " %add1 = add nsw i32 %mul11, %mul12\n"
> + " %add2 = add nsw i32 %mul22, %mul21\n"
> + " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store i32 %add1, i32* %E0, align 4\n"
> + " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add2, i32* %E1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x3");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 24));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(),
> 26));
> +
> + checkReorderExample(
> + Store1, Store2, Body,
> + getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));
> +}
> +
> +TEST_F(VPlanSlpTest, testSlpReorder_2) {
> + LLVMContext Ctx;
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "define void @add_x3(%struct.Test* %A, %struct.Test* %B,
> %struct.Test* "
> + "%C, %struct.Test* %D, %struct.Test* %E) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vB0 = load i32, i32* %B0, align 4\n"
> + " %mul11 = mul nsw i32 %vA0, %vB0\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vC0 = load i32, i32* %C0, align 4\n"
> + " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vD0 = load i32, i32* %D0, align 4\n"
> + " %mul12 = mul nsw i32 %vC0, %vD0\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vB1 = load i32, i32* %B1, align 4\n"
> + " %mul21 = mul nsw i32 %vB1, %vA1\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vC1 = load i32, i32* %C1, align 4\n"
> + " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vD1 = load i32, i32* %D1, align 4\n"
> + " %mul22 = mul nsw i32 %vD1, %vC1\n"
> + " %add1 = add nsw i32 %mul11, %mul12\n"
> + " %add2 = add nsw i32 %mul22, %mul21\n"
> + " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store i32 %add1, i32* %E0, align 4\n"
> + " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add2, i32* %E1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x3");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 24));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(),
> 26));
> +
> + checkReorderExample(
> + Store1, Store2, Body,
> + getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));
> +}
> +
> +TEST_F(VPlanSlpTest, testSlpReorder_3) {
> + LLVMContext Ctx;
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "define void @add_x3(%struct.Test* %A, %struct.Test* %B,
> %struct.Test* "
> + "%C, %struct.Test* %D, %struct.Test* %E) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vB0 = load i32, i32* %B0, align 4\n"
> + " %mul11 = mul nsw i32 %vA1, %vB0\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vC0 = load i32, i32* %C0, align 4\n"
> + " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vD0 = load i32, i32* %D0, align 4\n"
> + " %mul12 = mul nsw i32 %vC0, %vD0\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vB1 = load i32, i32* %B1, align 4\n"
> + " %mul21 = mul nsw i32 %vB1, %vA0\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vC1 = load i32, i32* %C1, align 4\n"
> + " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vD1 = load i32, i32* %D1, align 4\n"
> + " %mul22 = mul nsw i32 %vD1, %vC1\n"
> + " %add1 = add nsw i32 %mul11, %mul12\n"
> + " %add2 = add nsw i32 %mul22, %mul21\n"
> + " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store i32 %add1, i32* %E0, align 4\n"
> + " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add2, i32* %E1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x3");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 24));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(),
> 26));
> +
> + auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader),
> *Plan);
> + VPlanSlp Slp(VPIAI, *Body);
> + SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
> + EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
> +
> + // FIXME Need to select better first value for lane0.
> + EXPECT_FALSE(Slp.isCompletelySLP());
> +}
> +
> +TEST_F(VPlanSlpTest, testSlpReorder_4) {
> + LLVMContext Ctx;
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "define void @add_x3(%struct.Test* %A, %struct.Test* %B,
> %struct.Test* "
> + "%C, %struct.Test* %D, %struct.Test* %E) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vB0 = load i32, i32* %B0, align 4\n"
> + " %mul11 = mul nsw i32 %vA0, %vB0\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vC0 = load i32, i32* %C0, align 4\n"
> + " %D0 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vD0 = load i32, i32* %D0, align 4\n"
> + " %mul12 = mul nsw i32 %vC0, %vD0\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vB1 = load i32, i32* %B1, align 4\n"
> + " %mul21 = mul nsw i32 %vA1, %vB1\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vC1 = load i32, i32* %C1, align 4\n"
> + " %D1 = getelementptr inbounds %struct.Test, %struct.Test* %D, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vD1 = load i32, i32* %D1, align 4\n"
> + " %mul22 = mul nsw i32 %vC1, %vD1\n"
> + " %add1 = add nsw i32 %mul11, %mul12\n"
> + " %add2 = add nsw i32 %mul22, %mul21\n"
> + " %E0 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store i32 %add1, i32* %E0, align 4\n"
> + " %E1 = getelementptr inbounds %struct.Test, %struct.Test* %E, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add2, i32* %E1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x3");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 24));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(),
> 26));
> +
> + checkReorderExample(
> + Store1, Store2, Body,
> + getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader), *Plan));
> +}
> +
> +// Make sure we do not combine instructions with operands in different
> BBs.
> +TEST_F(VPlanSlpTest, testInstrsInDifferentBBs) {
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "%struct.Test3 = type { i32, i32, i32 }\n"
> + "%struct.Test4xi8 = type { i8, i8, i8 }\n"
> + "define void @add_x2(%struct.Test* nocapture readonly %A,
> %struct.Test* "
> + "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vB0 = load i32, i32* %B0, align 4\n"
> + " %add0 = add nsw i32 %vA0, %vB0\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 1\n"
> + " br label %bb2\n"
> + "bb2:\n"
> + " %vB1 = load i32, i32* %B1, align 4\n"
> + " %add1 = add nsw i32 %vA1, %vB1\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store i32 %add0, i32* %C0, align 4\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add1, i32* %C1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x2");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> + auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader),
> *Plan);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> + VPBasicBlock *BB2 = Body->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(),
> 3));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(),
> 5));
> +
> + VPlanSlp Slp(VPIAI, *BB2);
> + SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
> + EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
> + EXPECT_EQ(0u, Slp.getWidestBundleBits());
> +}
> +
> +// Make sure we do not combine instructions with operands in different
> BBs.
> +TEST_F(VPlanSlpTest, testInstrsInDifferentBBs2) {
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "%struct.Test3 = type { i32, i32, i32 }\n"
> + "%struct.Test4xi8 = type { i8, i8, i8 }\n"
> + "define void @add_x2(%struct.Test* nocapture readonly %A,
> %struct.Test* "
> + "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vB0 = load i32, i32* %B0, align 4\n"
> + " %add0 = add nsw i32 %vA0, %vB0\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vB1 = load i32, i32* %B1, align 4\n"
> + " %add1 = add nsw i32 %vA1, %vB1\n"
> + " br label %bb2\n"
> + "bb2:\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store i32 %add0, i32* %C0, align 4\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add1, i32* %C1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x2");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> + auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader),
> *Plan);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> + VPBasicBlock *BB2 = Body->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(BB2->begin(),
> 1));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(BB2->begin(),
> 3));
> +
> + VPlanSlp Slp(VPIAI, *BB2);
> + SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
> + EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
> + EXPECT_EQ(0u, Slp.getWidestBundleBits());
> +}
> +
> +TEST_F(VPlanSlpTest, testSlpAtomicLoad) {
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "%struct.Test3 = type { i32, i32, i32 }\n"
> + "%struct.Test4xi8 = type { i8, i8, i8 }\n"
> + "define void @add_x2(%struct.Test* nocapture readonly %A,
> %struct.Test* "
> + "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load atomic i32, i32* %A0 monotonic, align 4\n"
> + " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vB0 = load i32, i32* %B0, align 4\n"
> + " %add0 = add nsw i32 %vA0, %vB0\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vB1 = load i32, i32* %B1, align 4\n"
> + " %add1 = add nsw i32 %vA1, %vB1\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store i32 %add0, i32* %C0, align 4\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add1, i32* %C1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x2");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> + auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader),
> *Plan);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 12));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(),
> 14));
> +
> + VPlanSlp Slp(VPIAI, *Body);
> + SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
> + EXPECT_EQ(nullptr, Slp.buildGraph(StoreRoot));
> + EXPECT_FALSE(Slp.isCompletelySLP());
> +}
> +
> +TEST_F(VPlanSlpTest, testSlpAtomicStore) {
> + const char *ModuleString =
> + "%struct.Test = type { i32, i32 }\n"
> + "%struct.Test3 = type { i32, i32, i32 }\n"
> + "%struct.Test4xi8 = type { i8, i8, i8 }\n"
> + "define void @add_x2(%struct.Test* nocapture readonly %A,
> %struct.Test* "
> + "nocapture readonly %B, %struct.Test* nocapture %C) {\n"
> + "entry:\n"
> + " br label %for.body\n"
> + "for.body: ; preds =
> %for.body, "
> + "%entry\n"
> + " %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]\n"
> + " %A0 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vA0 = load i32, i32* %A0, align 4\n"
> + " %B0 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 0\n"
> + " %vB0 = load i32, i32* %B0, align 4\n"
> + " %add0 = add nsw i32 %vA0, %vB0\n"
> + " %A1 = getelementptr inbounds %struct.Test, %struct.Test* %A, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vA1 = load i32, i32* %A1, align 4\n"
> + " %B1 = getelementptr inbounds %struct.Test, %struct.Test* %B, i64
> "
> + "%indvars.iv, i32 1\n"
> + " %vB1 = load i32, i32* %B1, align 4\n"
> + " %add1 = add nsw i32 %vA1, %vB1\n"
> + " %C0 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 0\n"
> + " store atomic i32 %add0, i32* %C0 monotonic, align 4\n"
> + " %C1 = getelementptr inbounds %struct.Test, %struct.Test* %C, i64
> "
> + "%indvars.iv, i32 1\n"
> + " store i32 %add1, i32* %C1, align 4\n"
> + " %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1\n"
> + " %exitcond = icmp eq i64 %indvars.iv.next, 1024\n"
> + " br i1 %exitcond, label %for.cond.cleanup, label %for.body\n"
> + "for.cond.cleanup: ; preds =
> %for.body\n"
> + " ret void\n"
> + "}\n";
> +
> + Module &M = parseModule(ModuleString);
> +
> + Function *F = M.getFunction("add_x2");
> + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor();
> + auto Plan = buildHCFG(LoopHeader);
> + auto VPIAI = getInterleavedAccessInfo(*F, LI->getLoopFor(LoopHeader),
> *Plan);
> +
> + VPBlockBase *Entry = Plan->getEntry()->getEntryBasicBlock();
> + EXPECT_NE(nullptr, Entry->getSingleSuccessor());
> + VPBasicBlock *Body = Entry->getSingleSuccessor()->getEntryBasicBlock();
> +
> + VPInstruction *Store1 = cast<VPInstruction>(&*std::next(Body->begin(),
> 12));
> + VPInstruction *Store2 = cast<VPInstruction>(&*std::next(Body->begin(),
> 14));
> +
> + VPlanSlp Slp(VPIAI, *Body);
> + SmallVector<VPValue *, 4> StoreRoot = {Store1, Store2};
> + Slp.buildGraph(StoreRoot);
> + EXPECT_FALSE(Slp.isCompletelySLP());
> +}
> +
> +} // namespace
> +} // namespace llvm
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181114/2f9b9784/attachment.html>
More information about the llvm-commits
mailing list