[polly] 22c77f2 - [Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 10 05:25:18 PDT 2024
Author: Michael Kruse
Date: 2024-08-10T14:25:15+02:00
New Revision: 22c77f235416d137ea83875c16901fdf32b57159
URL: https://github.com/llvm/llvm-project/commit/22c77f235416d137ea83875c16901fdf32b57159
DIFF: https://github.com/llvm/llvm-project/commit/22c77f235416d137ea83875c16901fdf32b57159.diff
LOG: [Polly] Use separate DT/LI/SE for outlined subfn. NFC. (#102460)
DominatorTree, LoopInfo, and ScalarEvolution are function-level analyses
that expect to be called only on instructions and basic blocks of the
function they were original created for. When Polly outlined a parallel
loop body into a separate function, it reused the same analyses seemed
to work until new checks to be added in #101198.
This patch creates new analyses for the subfunctions. GenDT, GenLI, and
GenSE now refer to the analyses of the current region of code. Outside
of an outlined function, they refer to the same analysis as used for the
SCoP, but are substituted within an outlined function.
Additionally to the cross-function queries of DT/LI/SE, we must not
create SCEVs that refer to a mix of expressions for old and generated
values. Currently, SCEVs themselves do not "remember" which
ScalarEvolution analysis they were created for, but mixing them is just
as unexpected as using DT/LI across function boundaries. Hence
`SCEVLoopAddRecRewriter` was combined into `ScopExpander`.
`SCEVLoopAddRecRewriter` only replaced induction variables but left
SCEVUnknowns to reference the old function. `SCEVParameterRewriter`
would have done so but its job was effectively superseded by
`ScopExpander`, and now also `SCEVLoopAddRecRewriter`. Some issues
persist put marked with a FIXME in the code. Changing them would
possibly cause this patch to be not NFC anymore.
Added:
Modified:
polly/include/polly/CodeGen/BlockGenerators.h
polly/include/polly/CodeGen/IslExprBuilder.h
polly/include/polly/CodeGen/IslNodeBuilder.h
polly/include/polly/CodeGen/LoopGenerators.h
polly/include/polly/CodeGen/LoopGeneratorsGOMP.h
polly/include/polly/CodeGen/LoopGeneratorsKMP.h
polly/include/polly/Support/ScopHelper.h
polly/lib/CodeGen/BlockGenerators.cpp
polly/lib/CodeGen/IslExprBuilder.cpp
polly/lib/CodeGen/IslNodeBuilder.cpp
polly/lib/CodeGen/LoopGeneratorsGOMP.cpp
polly/lib/CodeGen/LoopGeneratorsKMP.cpp
polly/lib/Support/ScopHelper.cpp
Removed:
################################################################################
diff --git a/polly/include/polly/CodeGen/BlockGenerators.h b/polly/include/polly/CodeGen/BlockGenerators.h
index 13c27328d8c7e1..4e2645468a7434 100644
--- a/polly/include/polly/CodeGen/BlockGenerators.h
+++ b/polly/include/polly/CodeGen/BlockGenerators.h
@@ -162,9 +162,14 @@ class BlockGenerator {
/// The dominator tree of this function.
DominatorTree &DT;
- /// The entry block of the current function.
- BasicBlock *EntryBB;
+ /// Relates to the region where the code is emitted into.
+ /// @{
+ DominatorTree *GenDT;
+ LoopInfo *GenLI;
+ ScalarEvolution *GenSE;
+ /// @}
+public:
/// Map to resolve scalar dependences for PHI operands and scalars.
///
/// When translating code that contains scalar dependences as they result from
@@ -298,6 +303,10 @@ class BlockGenerator {
/// Split @p BB to create a new one we can use to clone @p BB in.
BasicBlock *splitBB(BasicBlock *BB);
+ /// Change the function that code is emitted into.
+ void switchGeneratedFunc(Function *GenFn, DominatorTree *GenDT,
+ LoopInfo *GenLI, ScalarEvolution *GenSE);
+
/// Copy the given basic block.
///
/// @param Stmt The statement to code generate.
diff --git a/polly/include/polly/CodeGen/IslExprBuilder.h b/polly/include/polly/CodeGen/IslExprBuilder.h
index 6842aaa456ac27..25f61be5787c13 100644
--- a/polly/include/polly/CodeGen/IslExprBuilder.h
+++ b/polly/include/polly/CodeGen/IslExprBuilder.h
@@ -124,6 +124,10 @@ class IslExprBuilder final {
llvm::ScalarEvolution &SE, llvm::DominatorTree &DT,
llvm::LoopInfo &LI, llvm::BasicBlock *StartBlock);
+ /// Change the function that code is emitted into.
+ void switchGeneratedFunc(llvm::Function *GenFn, llvm::DominatorTree *GenDT,
+ llvm::LoopInfo *GenLI, llvm::ScalarEvolution *GenSE);
+
/// Create LLVM-IR for an isl_ast_expr[ession].
///
/// @param Expr The ast expression for which we generate LLVM-IR.
@@ -205,10 +209,15 @@ class IslExprBuilder final {
const llvm::DataLayout &DL;
llvm::ScalarEvolution &SE;
- llvm::DominatorTree &DT;
- llvm::LoopInfo &LI;
llvm::BasicBlock *StartBlock;
+ /// Relates to the region where the code is emitted into.
+ /// @{
+ llvm::DominatorTree *GenDT;
+ llvm::LoopInfo *GenLI;
+ llvm::ScalarEvolution *GenSE;
+ /// @}
+
llvm::Value *createOp(__isl_take isl_ast_expr *Expr);
llvm::Value *createOpUnary(__isl_take isl_ast_expr *Expr);
llvm::Value *createOpAccess(__isl_take isl_ast_expr *Expr);
diff --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h
index 05f53d79d74a4f..81343af94b636e 100644
--- a/polly/include/polly/CodeGen/IslNodeBuilder.h
+++ b/polly/include/polly/CodeGen/IslNodeBuilder.h
@@ -72,7 +72,7 @@ class IslNodeBuilder {
BlockGen(Builder, LI, SE, DT, ScalarMap, EscapeMap, ValueMap,
&ExprBuilder, StartBlock),
RegionGen(BlockGen), DL(DL), LI(LI), SE(SE), DT(DT),
- StartBlock(StartBlock) {}
+ StartBlock(StartBlock), GenDT(&DT), GenLI(&LI), GenSE(&SE) {}
virtual ~IslNodeBuilder() = default;
@@ -147,6 +147,13 @@ class IslNodeBuilder {
DominatorTree &DT;
BasicBlock *StartBlock;
+ /// Relates to the region where the code is emitted into.
+ /// @{
+ DominatorTree *GenDT;
+ LoopInfo *GenLI;
+ ScalarEvolution *GenSE;
+ /// @}
+
/// The current iteration of out-of-scop loops
///
/// This map provides for a given loop a llvm::Value that contains the current
@@ -246,18 +253,6 @@ class IslNodeBuilder {
SetVector<Value *> &Values,
SetVector<const Loop *> &Loops);
- /// Change the llvm::Value(s) used for code generation.
- ///
- /// When generating code certain values (e.g., references to induction
- /// variables or array base pointers) in the original code may be replaced by
- /// new values. This function allows to (partially) update the set of values
- /// used. A typical use case for this function is the case when we continue
- /// code generation in a subfunction/kernel function and need to explicitly
- /// pass down certain values.
- ///
- /// @param NewValues A map that maps certain llvm::Values to new llvm::Values.
- void updateValues(ValueMapT &NewValues);
-
/// Return the most up-to-date version of the llvm::Value for code generation.
/// @param Original The Value to check for an up to date version.
/// @returns A remapped `Value` from ValueMap, or `Original` if no mapping
diff --git a/polly/include/polly/CodeGen/LoopGenerators.h b/polly/include/polly/CodeGen/LoopGenerators.h
index 8ec75e69890c95..6076e5951fb0a5 100644
--- a/polly/include/polly/CodeGen/LoopGenerators.h
+++ b/polly/include/polly/CodeGen/LoopGenerators.h
@@ -55,7 +55,7 @@ extern int PollyChunkSize;
/// @param Builder The builder used to create the loop.
/// @param P A pointer to the pass that uses this function.
/// It is used to update analysis information.
-/// @param LI The loop info for the current function
+/// @param LI The loop info we need to update
/// @param DT The dominator tree we need to update
/// @param ExitBlock The block the loop will exit to.
/// @param Predicate The predicate used to generate the upper loop
@@ -128,11 +128,9 @@ llvm::DebugLoc createDebugLocForGeneratedCode(Function *F);
class ParallelLoopGenerator {
public:
/// Create a parallel loop generator for the current function.
- ParallelLoopGenerator(PollyIRBuilder &Builder, LoopInfo &LI,
- DominatorTree &DT, const DataLayout &DL)
- : Builder(Builder), LI(LI), DT(DT),
- LongType(
- Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())),
+ ParallelLoopGenerator(PollyIRBuilder &Builder, const DataLayout &DL)
+ : Builder(Builder), LongType(Type::getIntNTy(Builder.getContext(),
+ DL.getPointerSizeInBits())),
M(Builder.GetInsertBlock()->getParent()->getParent()),
DLGenerated(createDebugLocForGeneratedCode(
Builder.GetInsertBlock()->getParent())) {}
@@ -164,11 +162,11 @@ class ParallelLoopGenerator {
/// The IR builder we use to create instructions.
PollyIRBuilder &Builder;
- /// The loop info of the current function we need to update.
- LoopInfo &LI;
+ /// The loop info for the generated subfunction.
+ std::unique_ptr<LoopInfo> SubFnLI;
- /// The dominance tree of the current function we need to update.
- DominatorTree &DT;
+ /// The dominance tree for the generated subfunction.
+ std::unique_ptr<DominatorTree> SubFnDT;
/// The type of a "long" on this hardware used for backend calls.
Type *LongType;
@@ -184,6 +182,12 @@ class ParallelLoopGenerator {
llvm::DebugLoc DLGenerated;
public:
+ /// Returns the DominatorTree for the generated subfunction.
+ DominatorTree *getCalleeDominatorTree() const { return SubFnDT.get(); }
+
+ /// Returns the LoopInfo for the generated subfunction.
+ LoopInfo *getCalleeLoopInfo() const { return SubFnLI.get(); }
+
/// Create a struct for all @p Values and store them in there.
///
/// @param Values The values which should be stored in the struct.
diff --git a/polly/include/polly/CodeGen/LoopGeneratorsGOMP.h b/polly/include/polly/CodeGen/LoopGeneratorsGOMP.h
index 4cc4f394f36319..1bf6e6e2cbbb78 100644
--- a/polly/include/polly/CodeGen/LoopGeneratorsGOMP.h
+++ b/polly/include/polly/CodeGen/LoopGeneratorsGOMP.h
@@ -25,9 +25,8 @@ namespace polly {
class ParallelLoopGeneratorGOMP final : public ParallelLoopGenerator {
public:
/// Create a parallel loop generator for the current function.
- ParallelLoopGeneratorGOMP(PollyIRBuilder &Builder, LoopInfo &LI,
- DominatorTree &DT, const DataLayout &DL)
- : ParallelLoopGenerator(Builder, LI, DT, DL) {}
+ ParallelLoopGeneratorGOMP(PollyIRBuilder &Builder, const DataLayout &DL)
+ : ParallelLoopGenerator(Builder, DL) {}
// The functions below may be used if one does not want to generate a
// specific OpenMP parallel loop, but generate individual parts of it
diff --git a/polly/include/polly/CodeGen/LoopGeneratorsKMP.h b/polly/include/polly/CodeGen/LoopGeneratorsKMP.h
index 245a63c7bae50f..f134857449c2df 100644
--- a/polly/include/polly/CodeGen/LoopGeneratorsKMP.h
+++ b/polly/include/polly/CodeGen/LoopGeneratorsKMP.h
@@ -27,9 +27,8 @@ using llvm::GlobalVariable;
class ParallelLoopGeneratorKMP final : public ParallelLoopGenerator {
public:
/// Create a parallel loop generator for the current function.
- ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, LoopInfo &LI,
- DominatorTree &DT, const DataLayout &DL)
- : ParallelLoopGenerator(Builder, LI, DT, DL) {
+ ParallelLoopGeneratorKMP(PollyIRBuilder &Builder, const DataLayout &DL)
+ : ParallelLoopGenerator(Builder, DL) {
SourceLocationInfo = createSourceLocation();
}
diff --git a/polly/include/polly/Support/ScopHelper.h b/polly/include/polly/Support/ScopHelper.h
index 17480c5381c516..13852ecb18ee7c 100644
--- a/polly/include/polly/Support/ScopHelper.h
+++ b/polly/include/polly/Support/ScopHelper.h
@@ -36,6 +36,9 @@ namespace polly {
class Scop;
class ScopStmt;
+/// Same as llvm/Analysis/ScalarEvolutionExpressions.h
+using LoopToScevMapT = llvm::DenseMap<const llvm::Loop *, const llvm::SCEV *>;
+
/// Enumeration of assumptions Polly can take.
enum AssumptionKind {
ALIASING,
@@ -383,20 +386,24 @@ void splitEntryBlockForAlloca(llvm::BasicBlock *EntryBlock,
/// as the call to SCEVExpander::expandCodeFor:
///
/// @param S The current Scop.
-/// @param SE The Scalar Evolution pass.
+/// @param SE The Scalar Evolution pass used by @p S.
+/// @param GenFn The function to generate code in. Can be the same as @p SE.
+/// @param GenSE The Scalar Evolution pass for @p GenFn.
/// @param DL The module data layout.
/// @param Name The suffix added to the new instruction names.
/// @param E The expression for which code is actually generated.
/// @param Ty The type of the resulting code.
/// @param IP The insertion point for the new code.
/// @param VMap A remapping of values used in @p E.
+/// @param LoopMap A remapping of loops used in @p E.
/// @param RTCBB The last block of the RTC. Used to insert loop-invariant
/// instructions in rare cases.
llvm::Value *expandCodeFor(Scop &S, llvm::ScalarEvolution &SE,
+ llvm::Function *GenFn, llvm::ScalarEvolution &GenSE,
const llvm::DataLayout &DL, const char *Name,
const llvm::SCEV *E, llvm::Type *Ty,
llvm::Instruction *IP, ValueMapT *VMap,
- llvm::BasicBlock *RTCBB);
+ LoopToScevMapT *LoopMap, llvm::BasicBlock *RTCBB);
/// Return the condition for the terminator @p TI.
///
diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp
index f7c777b4e80118..004fa64c82db2b 100644
--- a/polly/lib/CodeGen/BlockGenerators.cpp
+++ b/polly/lib/CodeGen/BlockGenerators.cpp
@@ -57,8 +57,8 @@ BlockGenerator::BlockGenerator(
PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT,
AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap,
ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, BasicBlock *StartBlock)
- : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT),
- EntryBB(nullptr), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
+ : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT), GenDT(&DT),
+ GenLI(&LI), GenSE(&SE), ScalarMap(ScalarMap), EscapeMap(EscapeMap),
GlobalMap(GlobalMap), StartBlock(StartBlock) {}
Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
@@ -75,7 +75,6 @@ Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
if (isa<SCEVCouldNotCompute>(Scev))
return nullptr;
- const SCEV *NewScev = SCEVLoopAddRecRewriter::rewrite(Scev, LTS, SE);
ValueMapT VTV;
VTV.insert(BBMap.begin(), BBMap.end());
VTV.insert(GlobalMap.begin(), GlobalMap.end());
@@ -86,9 +85,9 @@ Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
assert(IP != Builder.GetInsertBlock()->end() &&
"Only instructions can be insert points for SCEVExpander");
- Value *Expanded =
- expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV,
- StartBlock->getSinglePredecessor());
+ Value *Expanded = expandCodeFor(
+ S, SE, Builder.GetInsertBlock()->getParent(), *GenSE, DL, "polly", Scev,
+ Old->getType(), &*IP, &VTV, <S, StartBlock->getSinglePredecessor());
BBMap[Old] = Expanded;
return Expanded;
@@ -233,6 +232,8 @@ void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst,
return;
}
+ // FIXME: We will encounter "NewOperand" again if used twice. getNewValue()
+ // is meant to be called on old values only.
NewInst->replaceUsesOfWith(OldOperand, NewOperand);
}
@@ -410,7 +411,7 @@ void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S,
BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(),
- &*Builder.GetInsertPoint(), &DT, &LI);
+ &*Builder.GetInsertPoint(), GenDT, GenLI);
CopyBB->setName("polly.stmt." + BB->getName());
return CopyBB;
}
@@ -431,11 +432,20 @@ BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB,
return CopyBB;
}
+void BlockGenerator::switchGeneratedFunc(Function *GenFn, DominatorTree *GenDT,
+ LoopInfo *GenLI,
+ ScalarEvolution *GenSE) {
+ assert(GenFn == GenDT->getRoot()->getParent());
+ assert(GenLI->getTopLevelLoops().empty() ||
+ GenFn == GenLI->getTopLevelLoops().front()->getHeader()->getParent());
+ this->GenDT = GenDT;
+ this->GenLI = GenLI;
+ this->GenSE = GenSE;
+}
+
void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB,
ValueMapT &BBMap, LoopToScevMapT <S,
isl_id_to_ast_expr *NewAccesses) {
- EntryBB = &CopyBB->getParent()->getEntryBlock();
-
// Block statements and the entry blocks of region statement are code
// generated from instruction lists. This allow us to optimize the
// instructions that belong to a certain scop statement. As the code
@@ -497,7 +507,7 @@ Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) {
Addr =
new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
DL.getPrefTypeAlign(Ty), ScalarBase->getName() + NameExt);
- EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
+ BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
Addr->insertBefore(&*EntryBB->getFirstInsertionPt());
return Addr;
@@ -554,10 +564,6 @@ void BlockGenerator::generateScalarLoads(
auto *Address =
getImplicitAddress(*MA, getLoopForStmt(Stmt), LTS, BBMap, NewAccesses);
- assert((!isa<Instruction>(Address) ||
- DT.dominates(cast<Instruction>(Address)->getParent(),
- Builder.GetInsertBlock())) &&
- "Domination violation");
BBMap[MA->getAccessValue()] = Builder.CreateLoad(
MA->getElementType(), Address, Address->getName() + ".reload");
}
@@ -615,9 +621,9 @@ void BlockGenerator::generateConditionalExecution(
StringRef BlockName = HeadBlock->getName();
// Generate the conditional block.
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ DomTreeUpdater DTU(GenDT, DomTreeUpdater::UpdateStrategy::Eager);
SplitBlockAndInsertIfThen(Cond, &*Builder.GetInsertPoint(), false, nullptr,
- &DTU, &LI);
+ &DTU, GenLI);
BranchInst *Branch = cast<BranchInst>(HeadBlock->getTerminator());
BasicBlock *ThenBlock = Branch->getSuccessor(0);
BasicBlock *TailBlock = Branch->getSuccessor(1);
diff --git a/polly/lib/CodeGen/IslExprBuilder.cpp b/polly/lib/CodeGen/IslExprBuilder.cpp
index f40511e0273a26..aaafac14bf8065 100644
--- a/polly/lib/CodeGen/IslExprBuilder.cpp
+++ b/polly/lib/CodeGen/IslExprBuilder.cpp
@@ -42,10 +42,23 @@ IslExprBuilder::IslExprBuilder(Scop &S, PollyIRBuilder &Builder,
DominatorTree &DT, LoopInfo &LI,
BasicBlock *StartBlock)
: S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap),
- DL(DL), SE(SE), DT(DT), LI(LI), StartBlock(StartBlock) {
+ DL(DL), SE(SE), StartBlock(StartBlock), GenDT(&DT), GenLI(&LI),
+ GenSE(&SE) {
OverflowState = (OTMode == OT_ALWAYS) ? Builder.getFalse() : nullptr;
}
+void IslExprBuilder::switchGeneratedFunc(llvm::Function *GenFn,
+ llvm::DominatorTree *GenDT,
+ llvm::LoopInfo *GenLI,
+ llvm::ScalarEvolution *GenSE) {
+ assert(GenFn == GenDT->getRoot()->getParent());
+ assert(GenLI->getTopLevelLoops().empty() ||
+ GenFn == GenLI->getTopLevelLoops().front()->getHeader()->getParent());
+ this->GenDT = GenDT;
+ this->GenLI = GenLI;
+ this->GenSE = GenSE;
+}
+
void IslExprBuilder::setTrackOverflow(bool Enable) {
// If potential overflows are tracked always or never we ignore requests
// to change the behavior.
@@ -307,14 +320,12 @@ IslExprBuilder::createAccessAddress(__isl_take isl_ast_expr *Expr) {
const SCEV *DimSCEV = SAI->getDimensionSize(u);
- llvm::ValueToSCEVMapTy Map;
- for (auto &KV : GlobalMap)
- Map[KV.first] = SE.getSCEV(KV.second);
- DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map);
- Value *DimSize =
- expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(),
- &*Builder.GetInsertPoint(), nullptr,
- StartBlock->getSinglePredecessor());
+ // DimSize should be invariant to the SCoP, so no BBMap nor LoopToScev
+ // needed. But GlobalMap may contain SCoP-invariant vars.
+ Value *DimSize = expandCodeFor(
+ S, SE, Builder.GetInsertBlock()->getParent(), *GenSE, DL, "polly",
+ DimSCEV, DimSCEV->getType(), &*Builder.GetInsertPoint(), &GlobalMap,
+ /*LoopMap*/ nullptr, StartBlock->getSinglePredecessor());
Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType());
@@ -602,10 +613,10 @@ IslExprBuilder::createOpBooleanConditional(__isl_take isl_ast_expr *Expr) {
auto InsertBB = Builder.GetInsertBlock();
auto InsertPoint = Builder.GetInsertPoint();
- auto NextBB = SplitBlock(InsertBB, &*InsertPoint, &DT, &LI);
+ auto NextBB = SplitBlock(InsertBB, &*InsertPoint, GenDT, GenLI);
BasicBlock *CondBB = BasicBlock::Create(Context, "polly.cond", F);
- LI.changeLoopFor(CondBB, LI.getLoopFor(InsertBB));
- DT.addNewBlock(CondBB, InsertBB);
+ GenLI->changeLoopFor(CondBB, GenLI->getLoopFor(InsertBB));
+ GenDT->addNewBlock(CondBB, InsertBB);
InsertBB->getTerminator()->eraseFromParent();
Builder.SetInsertPoint(InsertBB);
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index 8b2207ecbf362c..3f07f02038a1ec 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -30,10 +30,12 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -44,11 +46,13 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "isl/aff.h"
#include "isl/aff_type.h"
@@ -366,22 +370,6 @@ void IslNodeBuilder::getReferencesInSubtree(const isl::ast_node &For,
Values = ReplacedValues;
}
-void IslNodeBuilder::updateValues(ValueMapT &NewValues) {
- SmallPtrSet<Value *, 5> Inserted;
-
- for (const auto &I : IDToValue) {
- IDToValue[I.first] = NewValues[I.second];
- Inserted.insert(I.second);
- }
-
- for (const auto &I : NewValues) {
- if (Inserted.count(I.first))
- continue;
-
- ValueMap[I.first] = I.second;
- }
-}
-
Value *IslNodeBuilder::getLatestValue(Value *Original) const {
auto It = ValueMap.find(Original);
if (It == ValueMap.end())
@@ -488,10 +476,10 @@ void IslNodeBuilder::createForSequential(isl::ast_node_for For,
// If we can show that LB <Predicate> UB holds at least once, we can
// omit the GuardBB in front of the loop.
- bool UseGuardBB =
- !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB));
- IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock,
- Predicate, &Annotator, MarkParallel, UseGuardBB,
+ bool UseGuardBB = !GenSE->isKnownPredicate(Predicate, GenSE->getSCEV(ValueLB),
+ GenSE->getSCEV(ValueUB));
+ IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, *GenLI, *GenDT,
+ ExitBlock, Predicate, &Annotator, MarkParallel, UseGuardBB,
LoopVectorizerDisabled);
IDToValue[IteratorID.get()] = IV;
@@ -506,50 +494,6 @@ void IslNodeBuilder::createForSequential(isl::ast_node_for For,
SequentialLoops++;
}
-/// Remove the BBs contained in a (sub)function from the dominator tree.
-///
-/// This function removes the basic blocks that are part of a subfunction from
-/// the dominator tree. Specifically, when generating code it may happen that at
-/// some point the code generation continues in a new sub-function (e.g., when
-/// generating OpenMP code). The basic blocks that are created in this
-/// sub-function are then still part of the dominator tree of the original
-/// function, such that the dominator tree reaches over function boundaries.
-/// This is not only incorrect, but also causes crashes. This function now
-/// removes from the dominator tree all basic blocks that are dominated (and
-/// consequently reachable) from the entry block of this (sub)function.
-///
-/// FIXME: A LLVM (function or region) pass should not touch anything outside of
-/// the function/region it runs on. Hence, the pure need for this function shows
-/// that we do not comply to this rule. At the moment, this does not cause any
-/// issues, but we should be aware that such issues may appear. Unfortunately
-/// the current LLVM pass infrastructure does not allow to make Polly a module
-/// or call-graph pass to solve this issue, as such a pass would not have access
-/// to the per-function analyses passes needed by Polly. A future pass manager
-/// infrastructure is supposed to enable such kind of access possibly allowing
-/// us to create a cleaner solution here.
-///
-/// FIXME: Instead of adding the dominance information and then dropping it
-/// later on, we should try to just not add it in the first place. This requires
-/// some careful testing to make sure this does not break in interaction with
-/// the SCEVBuilder and SplitBlock which may rely on the dominator tree or
-/// which may try to update it.
-///
-/// @param F The function which contains the BBs to removed.
-/// @param DT The dominator tree from which to remove the BBs.
-static void removeSubFuncFromDomTree(Function *F, DominatorTree &DT) {
- DomTreeNode *N = DT.getNode(&F->getEntryBlock());
- std::vector<BasicBlock *> Nodes;
-
- // We can only remove an element from the dominator tree, if all its children
- // have been removed. To ensure this we obtain the list of nodes to remove
- // using a post-order tree traversal.
- for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I)
- Nodes.push_back(I->getBlock());
-
- for (BasicBlock *BB : Nodes)
- DT.eraseNode(BB);
-}
-
void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
isl_ast_node *Body;
isl_ast_expr *Init, *Inc, *Iterator, *UB;
@@ -619,31 +563,108 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
switch (PollyOmpBackend) {
case OpenMPBackend::GNU:
- ParallelLoopGenPtr.reset(
- new ParallelLoopGeneratorGOMP(Builder, LI, DT, DL));
+ ParallelLoopGenPtr.reset(new ParallelLoopGeneratorGOMP(Builder, DL));
break;
case OpenMPBackend::LLVM:
- ParallelLoopGenPtr.reset(new ParallelLoopGeneratorKMP(Builder, LI, DT, DL));
+ ParallelLoopGenPtr.reset(new ParallelLoopGeneratorKMP(Builder, DL));
break;
}
IV = ParallelLoopGenPtr->createParallelLoop(
ValueLB, ValueUB, ValueInc, SubtreeValues, NewValues, &LoopBody);
BasicBlock::iterator AfterLoop = Builder.GetInsertPoint();
- Builder.SetInsertPoint(&*LoopBody);
// Remember the parallel subfunction
- ParallelSubfunctions.push_back(LoopBody->getFunction());
-
- // Save the current values.
- auto ValueMapCopy = ValueMap;
+ Function *SubFn = LoopBody->getFunction();
+ ParallelSubfunctions.push_back(SubFn);
+
+ // We start working on the outlined function. Since DominatorTree/LoopInfo are
+ // not an inter-procedural passes, we temporarily switch them out. Save the
+ // old ones first.
+ Function *CallerFn = Builder.GetInsertBlock()->getParent();
+ DominatorTree *CallerDT = GenDT;
+ LoopInfo *CallerLI = GenLI;
+ ScalarEvolution *CallerSE = GenSE;
+ ValueMapT CallerGlobals = ValueMap;
IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue;
- updateValues(NewValues);
+ // Get the analyses for the subfunction. ParallelLoopGenerator already create
+ // DominatorTree and LoopInfo for us.
+ DominatorTree *SubDT = ParallelLoopGenPtr->getCalleeDominatorTree();
+ LoopInfo *SubLI = ParallelLoopGenPtr->getCalleeLoopInfo();
+
+ // Create TargetLibraryInfo, AssumptionCachem and ScalarEvolution ourselves.
+ // TODO: Ideally, we would use the pass manager's TargetLibraryInfoPass and
+ // AssumptionAnalysis instead of our own. They contain more target-specific
+ // information than we have available here: TargetLibraryInfoImpl can be a
+ // derived class determined by TargetMachine, AssumptionCache can be
+ // configured using a TargetTransformInfo object also derived from
+ // TargetMachine.
+ TargetLibraryInfoImpl BaselineInfoImpl(
+ Triple(SubFn->getParent()->getTargetTriple()));
+ TargetLibraryInfo CalleeTLI(BaselineInfoImpl, SubFn);
+ AssumptionCache CalleeAC(*SubFn);
+ std::unique_ptr<ScalarEvolution> SubSE = std::make_unique<ScalarEvolution>(
+ *SubFn, CalleeTLI, CalleeAC, *SubDT, *SubLI);
+
+ // Switch to the subfunction
+ GenDT = SubDT;
+ GenLI = SubLI;
+ GenSE = SubSE.get();
+ BlockGen.switchGeneratedFunc(SubFn, GenDT, GenLI, GenSE);
+ ExprBuilder.switchGeneratedFunc(SubFn, GenDT, GenLI, GenSE);
+ Builder.SetInsertPoint(&*LoopBody);
+
+ // Update the ValueMap to use instructions in the subfunction. Note that
+ // "GlobalMap" used in BlockGenerator/IslExprBuilder is a reference to this
+ // ValueMap.
+ for (auto &[OldVal, NewVal] : ValueMap) {
+ NewVal = NewValues.lookup(NewVal);
+
+ // Clean-up any value that getReferencesInSubtree thinks we do not need.
+ // DenseMap::erase only writes a tombstone (and destroys OldVal/NewVal), so
+ // does not invalidate our iterator.
+ if (!NewVal)
+ ValueMap.erase(OldVal);
+ }
+
+ // This is for NewVals that do not appear in ValueMap (such as SCoP-invariant
+ // values whose original value can be reused as long as we are in the same
+ // function). No need to map the others.
+ for (auto &[NewVal, NewNewVal] : NewValues) {
+ if (Instruction *NewValInst = dyn_cast<Instruction>((Value *)NewVal)) {
+ if (S.contains(NewValInst))
+ continue;
+ assert(NewValInst->getFunction() == &S.getFunction());
+ }
+ assert(!ValueMap.contains(NewVal));
+ ValueMap[NewVal] = NewNewVal;
+ }
+
+ // Also update the IDToValue map to use instructions from the subfunction.
+ for (auto &[OldVal, NewVal] : IDToValue) {
+ NewVal = NewValues.lookup(NewVal);
+ assert(NewVal);
+ }
IDToValue[IteratorID] = IV;
- ValueMapT NewValuesReverse;
+#ifndef NDEBUG
+ // Check whether the maps now exclusively refer to SubFn values.
+ for (auto &[OldVal, SubVal] : ValueMap) {
+ Instruction *SubInst = dyn_cast<Instruction>((Value *)SubVal);
+ assert(SubInst->getFunction() == SubFn &&
+ "Instructions from outside the subfn cannot be accessed within the "
+ "subfn");
+ }
+ for (auto &[Id, SubVal] : IDToValue) {
+ Instruction *SubInst = dyn_cast<Instruction>((Value *)SubVal);
+ assert(SubInst->getFunction() == SubFn &&
+ "Instructions from outside the subfn cannot be accessed within the "
+ "subfn");
+ }
+#endif
+ ValueMapT NewValuesReverse;
for (auto P : NewValues)
NewValuesReverse[P.second] = P.first;
@@ -652,12 +673,16 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
create(Body);
Annotator.resetAlternativeAliasBases();
- // Restore the original values.
- ValueMap = ValueMapCopy;
- IDToValue = IDToValueCopy;
+ // Resume working on the caller function.
+ GenDT = CallerDT;
+ GenLI = CallerLI;
+ GenSE = CallerSE;
+ IDToValue = std::move(IDToValueCopy);
+ ValueMap = std::move(CallerGlobals);
+ ExprBuilder.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE);
+ BlockGen.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE);
Builder.SetInsertPoint(&*AfterLoop);
- removeSubFuncFromDomTree((*LoopBody).getParent()->getParent(), DT);
for (const Loop *L : Loops)
OutsideLoopIterations.erase(L);
@@ -686,21 +711,21 @@ void IslNodeBuilder::createIf(__isl_take isl_ast_node *If) {
LLVMContext &Context = F->getContext();
BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
- &*Builder.GetInsertPoint(), &DT, &LI);
+ &*Builder.GetInsertPoint(), GenDT, GenLI);
CondBB->setName("polly.cond");
- BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
+ BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), GenDT, GenLI);
MergeBB->setName("polly.merge");
BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F);
BasicBlock *ElseBB = BasicBlock::Create(Context, "polly.else", F);
- DT.addNewBlock(ThenBB, CondBB);
- DT.addNewBlock(ElseBB, CondBB);
- DT.changeImmediateDominator(MergeBB, CondBB);
+ GenDT->addNewBlock(ThenBB, CondBB);
+ GenDT->addNewBlock(ElseBB, CondBB);
+ GenDT->changeImmediateDominator(MergeBB, CondBB);
- Loop *L = LI.getLoopFor(CondBB);
+ Loop *L = GenLI->getLoopFor(CondBB);
if (L) {
- L->addBasicBlockToLoop(ThenBB, LI);
- L->addBasicBlockToLoop(ElseBB, LI);
+ L->addBasicBlockToLoop(ThenBB, *GenLI);
+ L->addBasicBlockToLoop(ElseBB, *GenLI);
}
CondBB->getTerminator()->eraseFromParent();
@@ -1088,19 +1113,19 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA,
Cond = Builder.CreateIsNotNull(Cond);
BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(),
- &*Builder.GetInsertPoint(), &DT, &LI);
+ &*Builder.GetInsertPoint(), GenDT, GenLI);
CondBB->setName("polly.preload.cond");
- BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), &DT, &LI);
+ BasicBlock *MergeBB = SplitBlock(CondBB, &CondBB->front(), GenDT, GenLI);
MergeBB->setName("polly.preload.merge");
Function *F = Builder.GetInsertBlock()->getParent();
LLVMContext &Context = F->getContext();
BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F);
- DT.addNewBlock(ExecBB, CondBB);
- if (Loop *L = LI.getLoopFor(CondBB))
- L->addBasicBlockToLoop(ExecBB, LI);
+ GenDT->addNewBlock(ExecBB, CondBB);
+ if (Loop *L = GenLI->getLoopFor(CondBB))
+ L->addBasicBlockToLoop(ExecBB, *GenLI);
auto *CondBBTerminator = CondBB->getTerminator();
Builder.SetInsertPoint(CondBBTerminator);
@@ -1326,7 +1351,7 @@ bool IslNodeBuilder::preloadInvariantLoads() {
return true;
BasicBlock *PreLoadBB = SplitBlock(Builder.GetInsertBlock(),
- &*Builder.GetInsertPoint(), &DT, &LI);
+ &*Builder.GetInsertPoint(), GenDT, GenLI);
PreLoadBB->setName("polly.preload.begin");
Builder.SetInsertPoint(&PreLoadBB->front());
@@ -1375,8 +1400,10 @@ Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) {
assert(Builder.GetInsertBlock()->end() != Builder.GetInsertPoint() &&
"Insert location points after last valid instruction");
Instruction *InsertLocation = &*Builder.GetInsertPoint();
- return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(),
- InsertLocation, &ValueMap,
+
+ return expandCodeFor(S, SE, Builder.GetInsertBlock()->getParent(), *GenSE, DL,
+ "polly", Expr, Expr->getType(), InsertLocation,
+ &ValueMap, /*LoopToScevMap*/ nullptr,
StartBlock->getSinglePredecessor());
}
diff --git a/polly/lib/CodeGen/LoopGeneratorsGOMP.cpp b/polly/lib/CodeGen/LoopGeneratorsGOMP.cpp
index e7512c1f33f610..cd440b28202e6d 100644
--- a/polly/lib/CodeGen/LoopGeneratorsGOMP.cpp
+++ b/polly/lib/CodeGen/LoopGeneratorsGOMP.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/LoopGeneratorsGOMP.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Module.h"
@@ -108,21 +109,20 @@ ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData,
Function *SubFn = createSubFnDefinition();
LLVMContext &Context = SubFn->getContext();
- // Store the previous basic block.
- BasicBlock *PrevBB = Builder.GetInsertBlock();
-
// Create basic blocks.
BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
+ SubFnDT = std::make_unique<DominatorTree>(*SubFn);
+ SubFnLI = std::make_unique<LoopInfo>(*SubFnDT);
+
BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
BasicBlock *CheckNextBB =
BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
BasicBlock *PreHeaderBB =
BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
- DT.addNewBlock(HeaderBB, PrevBB);
- DT.addNewBlock(ExitBB, HeaderBB);
- DT.addNewBlock(CheckNextBB, HeaderBB);
- DT.addNewBlock(PreHeaderBB, HeaderBB);
+ SubFnDT->addNewBlock(ExitBB, HeaderBB);
+ SubFnDT->addNewBlock(CheckNextBB, HeaderBB);
+ SubFnDT->addNewBlock(PreHeaderBB, HeaderBB);
// Fill up basic block HeaderBB.
Builder.SetInsertPoint(HeaderBB);
@@ -155,8 +155,8 @@ ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData,
Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
BasicBlock *AfterBB;
Value *IV =
- createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE,
- nullptr, true, /* UseGuard */ false);
+ createLoop(LB, UB, Stride, Builder, *SubFnLI, *SubFnDT, AfterBB,
+ ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false);
BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
@@ -167,6 +167,10 @@ ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData,
Builder.SetInsertPoint(&*LoopBody);
+ // FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the
+ // DominatorTree/LoopInfo has been created correctly. Alternatively, recreate
+ // from scratch since it is not needed here directly.
+
return std::make_tuple(IV, SubFn);
}
diff --git a/polly/lib/CodeGen/LoopGeneratorsKMP.cpp b/polly/lib/CodeGen/LoopGeneratorsKMP.cpp
index b3af7b14f47808..4ec5afe6aa6317 100644
--- a/polly/lib/CodeGen/LoopGeneratorsKMP.cpp
+++ b/polly/lib/CodeGen/LoopGeneratorsKMP.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/LoopGeneratorsKMP.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Module.h"
@@ -135,21 +136,20 @@ ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride,
Function *SubFn = createSubFnDefinition();
LLVMContext &Context = SubFn->getContext();
- // Store the previous basic block.
- BasicBlock *PrevBB = Builder.GetInsertBlock();
-
// Create basic blocks.
BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
+ SubFnDT = std::make_unique<DominatorTree>(*SubFn);
+ SubFnLI = std::make_unique<LoopInfo>(*SubFnDT);
+
BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
BasicBlock *CheckNextBB =
BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
BasicBlock *PreHeaderBB =
BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
- DT.addNewBlock(HeaderBB, PrevBB);
- DT.addNewBlock(ExitBB, HeaderBB);
- DT.addNewBlock(CheckNextBB, HeaderBB);
- DT.addNewBlock(PreHeaderBB, HeaderBB);
+ SubFnDT->addNewBlock(ExitBB, HeaderBB);
+ SubFnDT->addNewBlock(CheckNextBB, HeaderBB);
+ SubFnDT->addNewBlock(PreHeaderBB, HeaderBB);
// Fill up basic block HeaderBB.
Builder.SetInsertPoint(HeaderBB);
@@ -291,8 +291,8 @@ ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride,
Builder.CreateBr(CheckNextBB);
Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
BasicBlock *AfterBB;
- Value *IV = createLoop(LB, UB, SequentialLoopStride, Builder, LI, DT, AfterBB,
- ICmpInst::ICMP_SLE, nullptr, true,
+ Value *IV = createLoop(LB, UB, SequentialLoopStride, Builder, *SubFnLI,
+ *SubFnDT, AfterBB, ICmpInst::ICMP_SLE, nullptr, true,
/* UseGuard */ false);
BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
@@ -307,6 +307,10 @@ ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride,
Builder.CreateRetVoid();
Builder.SetInsertPoint(&*LoopBody);
+ // FIXME: Call SubFnDT->verify() and SubFnLI->verify() to check that the
+ // DominatorTree/LoopInfo has been created correctly. Alternatively, recreate
+ // from scratch since it is not needed here directly.
+
return std::make_tuple(IV, SubFn);
}
diff --git a/polly/lib/Support/ScopHelper.cpp b/polly/lib/Support/ScopHelper.cpp
index 24c7011b06de93..754bf50e2911f1 100644
--- a/polly/lib/Support/ScopHelper.cpp
+++ b/polly/lib/Support/ScopHelper.cpp
@@ -228,6 +228,22 @@ void polly::recordAssumption(polly::RecordedAssumptionsTy *RecordedAssumptions,
RecordedAssumptions->push_back({Kind, Sign, Set, Loc, BB, RTC});
}
+/// ScopExpander generates IR the the value of a SCEV that represents a value
+/// from a SCoP.
+///
+/// IMPORTANT: There are two ScalarEvolutions at play here. First, the SE that
+/// was used to analyze the original SCoP (not actually referenced anywhere
+/// here, but passed as argument to make the distinction clear). Second, GenSE
+/// which is the SE for the function that the code is emitted into. SE and GenSE
+/// may be
diff erent when the generated code is to be emitted into an outlined
+/// function, e.g. for a parallel loop. That is, each SCEV is to be used only by
+/// the SE that "owns" it and ScopExpander handles the translation between them.
+/// The SCEVVisitor methods are only to be called on SCEVs of the original SE.
+/// Their job is to create a new SCEV for GenSE. The nested SCEVExpander is to
+/// be used only with SCEVs belonging to GenSE. Currently SCEVs do not store a
+/// reference to the ScalarEvolution they belong to, so a mixup does not
+/// immediately cause a crash but certainly is a violation of its interface.
+///
/// The SCEVExpander will __not__ generate any code for an existing SDiv/SRem
/// instruction but just use it, if it is referenced as a SCEVUnknown. We want
/// however to generate new code if the instruction is in the analyzed region
@@ -237,19 +253,19 @@ void polly::recordAssumption(polly::RecordedAssumptionsTy *RecordedAssumptions,
struct ScopExpander final : SCEVVisitor<ScopExpander, const SCEV *> {
friend struct SCEVVisitor<ScopExpander, const SCEV *>;
- explicit ScopExpander(const Region &R, ScalarEvolution &SE,
- const DataLayout &DL, const char *Name, ValueMapT *VMap,
- BasicBlock *RTCBB)
- : Expander(SE, DL, Name, /*PreserveLCSSA=*/false), SE(SE), Name(Name),
- R(R), VMap(VMap), RTCBB(RTCBB) {}
+ explicit ScopExpander(const Region &R, ScalarEvolution &SE, Function *GenFn,
+ ScalarEvolution &GenSE, const DataLayout &DL,
+ const char *Name, ValueMapT *VMap,
+ LoopToScevMapT *LoopMap, BasicBlock *RTCBB)
+ : Expander(GenSE, DL, Name, /*PreserveLCSSA=*/false), Name(Name), R(R),
+ VMap(VMap), LoopMap(LoopMap), RTCBB(RTCBB), GenSE(GenSE), GenFn(GenFn) {
+ }
- Value *expandCodeFor(const SCEV *E, Type *Ty, Instruction *I) {
- // If we generate code in the region we will immediately fall back to the
- // SCEVExpander, otherwise we will stop at all unknowns in the SCEV and if
- // needed replace them by copies computed in the entering block.
- if (!R.contains(I))
- E = visit(E);
- return Expander.expandCodeFor(E, Ty, I);
+ Value *expandCodeFor(const SCEV *E, Type *Ty, Instruction *IP) {
+ assert(isInGenRegion(IP) &&
+ "ScopExpander assumes to be applied to generated code region");
+ const SCEV *GenE = visit(E);
+ return Expander.expandCodeFor(GenE, Ty, IP);
}
const SCEV *visit(const SCEV *E) {
@@ -265,16 +281,32 @@ struct ScopExpander final : SCEVVisitor<ScopExpander, const SCEV *> {
private:
SCEVExpander Expander;
- ScalarEvolution &SE;
const char *Name;
const Region &R;
ValueMapT *VMap;
+ LoopToScevMapT *LoopMap;
BasicBlock *RTCBB;
DenseMap<const SCEV *, const SCEV *> SCEVCache;
+ ScalarEvolution &GenSE;
+ Function *GenFn;
+
+ /// Is the instruction part of the original SCoP (in contrast to be located in
+ /// the code-generated region)?
+ bool isInOrigRegion(Instruction *Inst) {
+ Function *Fn = R.getEntry()->getParent();
+ bool isInOrigRegion = Inst->getFunction() == Fn && R.contains(Inst);
+ assert((isInOrigRegion || GenFn == Inst->getFunction()) &&
+ "Instruction expected to be either in the SCoP or the translated "
+ "region");
+ return isInOrigRegion;
+ }
+
+ bool isInGenRegion(Instruction *Inst) { return !isInOrigRegion(Inst); }
+
const SCEV *visitGenericInst(const SCEVUnknown *E, Instruction *Inst,
Instruction *IP) {
- if (!Inst || !R.contains(Inst))
+ if (!Inst || isInGenRegion(Inst))
return E;
assert(!Inst->mayThrow() && !Inst->mayReadOrWriteMemory() &&
@@ -282,15 +314,15 @@ struct ScopExpander final : SCEVVisitor<ScopExpander, const SCEV *> {
auto *InstClone = Inst->clone();
for (auto &Op : Inst->operands()) {
- assert(SE.isSCEVable(Op->getType()));
- auto *OpSCEV = SE.getSCEV(Op);
+ assert(GenSE.isSCEVable(Op->getType()));
+ auto *OpSCEV = GenSE.getSCEV(Op);
auto *OpClone = expandCodeFor(OpSCEV, Op->getType(), IP);
InstClone->replaceUsesOfWith(Op, OpClone);
}
InstClone->setName(Name + Inst->getName());
InstClone->insertBefore(IP);
- return SE.getSCEV(InstClone);
+ return GenSE.getSCEV(InstClone);
}
const SCEV *visitUnknown(const SCEVUnknown *E) {
@@ -298,19 +330,27 @@ struct ScopExpander final : SCEVVisitor<ScopExpander, const SCEV *> {
// If a value mapping was given try if the underlying value is remapped.
Value *NewVal = VMap ? VMap->lookup(E->getValue()) : nullptr;
if (NewVal) {
- auto *NewE = SE.getSCEV(NewVal);
+ auto *NewE = GenSE.getSCEV(NewVal);
// While the mapped value might be
diff erent the SCEV representation might
// not be. To this end we will check before we go into recursion here.
+ // FIXME: SCEVVisitor must only visit SCEVs that belong to the original
+ // SE. This calls it on SCEVs that belong GenSE.
if (E != NewE)
return visit(NewE);
}
Instruction *Inst = dyn_cast<Instruction>(E->getValue());
Instruction *IP;
- if (Inst && !R.contains(Inst))
+ if (Inst && isInGenRegion(Inst))
IP = Inst;
- else if (Inst && RTCBB->getParent() == Inst->getFunction())
+ else if (R.getEntry()->getParent() != GenFn) {
+ // RTCBB is in the original function, but we are generating for a
+ // subfunction so we cannot emit to RTCBB. Usually, we land here only
+ // because E->getValue() is not an instruction but a global or constant
+ // which do not need to emit anything.
+ IP = GenFn->getEntryBlock().getTerminator();
+ } else if (Inst && RTCBB->getParent() == Inst->getFunction())
IP = RTCBB->getTerminator();
else
IP = RTCBB->getParent()->getEntryBlock().getTerminator();
@@ -319,11 +359,11 @@ struct ScopExpander final : SCEVVisitor<ScopExpander, const SCEV *> {
Inst->getOpcode() != Instruction::SDiv))
return visitGenericInst(E, Inst, IP);
- const SCEV *LHSScev = SE.getSCEV(Inst->getOperand(0));
- const SCEV *RHSScev = SE.getSCEV(Inst->getOperand(1));
+ const SCEV *LHSScev = GenSE.getSCEV(Inst->getOperand(0));
+ const SCEV *RHSScev = GenSE.getSCEV(Inst->getOperand(1));
- if (!SE.isKnownNonZero(RHSScev))
- RHSScev = SE.getUMaxExpr(RHSScev, SE.getConstant(E->getType(), 1));
+ if (!GenSE.isKnownNonZero(RHSScev))
+ RHSScev = GenSE.getUMaxExpr(RHSScev, GenSE.getConstant(E->getType(), 1));
Value *LHS = expandCodeFor(LHSScev, E->getType(), IP);
Value *RHS = expandCodeFor(RHSScev, E->getType(), IP);
@@ -331,89 +371,105 @@ struct ScopExpander final : SCEVVisitor<ScopExpander, const SCEV *> {
Inst =
BinaryOperator::Create((Instruction::BinaryOps)Inst->getOpcode(), LHS,
RHS, Inst->getName() + Name, IP->getIterator());
- return SE.getSCEV(Inst);
+ return GenSE.getSCEV(Inst);
}
- /// The following functions will just traverse the SCEV and rebuild it with
- /// the new operands returned by the traversal.
+ /// The following functions will just traverse the SCEV and rebuild it using
+ /// GenSE and the new operands returned by the traversal.
///
///{
const SCEV *visitConstant(const SCEVConstant *E) { return E; }
const SCEV *visitVScale(const SCEVVScale *E) { return E; }
const SCEV *visitPtrToIntExpr(const SCEVPtrToIntExpr *E) {
- return SE.getPtrToIntExpr(visit(E->getOperand()), E->getType());
+ return GenSE.getPtrToIntExpr(visit(E->getOperand()), E->getType());
}
const SCEV *visitTruncateExpr(const SCEVTruncateExpr *E) {
- return SE.getTruncateExpr(visit(E->getOperand()), E->getType());
+ return GenSE.getTruncateExpr(visit(E->getOperand()), E->getType());
}
const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *E) {
- return SE.getZeroExtendExpr(visit(E->getOperand()), E->getType());
+ return GenSE.getZeroExtendExpr(visit(E->getOperand()), E->getType());
}
const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *E) {
- return SE.getSignExtendExpr(visit(E->getOperand()), E->getType());
+ return GenSE.getSignExtendExpr(visit(E->getOperand()), E->getType());
}
const SCEV *visitUDivExpr(const SCEVUDivExpr *E) {
auto *RHSScev = visit(E->getRHS());
- if (!SE.isKnownNonZero(RHSScev))
- RHSScev = SE.getUMaxExpr(RHSScev, SE.getConstant(E->getType(), 1));
- return SE.getUDivExpr(visit(E->getLHS()), RHSScev);
+ if (!GenSE.isKnownNonZero(RHSScev))
+ RHSScev = GenSE.getUMaxExpr(RHSScev, GenSE.getConstant(E->getType(), 1));
+ return GenSE.getUDivExpr(visit(E->getLHS()), RHSScev);
}
const SCEV *visitAddExpr(const SCEVAddExpr *E) {
SmallVector<const SCEV *, 4> NewOps;
for (const SCEV *Op : E->operands())
NewOps.push_back(visit(Op));
- return SE.getAddExpr(NewOps);
+ return GenSE.getAddExpr(NewOps);
}
const SCEV *visitMulExpr(const SCEVMulExpr *E) {
SmallVector<const SCEV *, 4> NewOps;
for (const SCEV *Op : E->operands())
NewOps.push_back(visit(Op));
- return SE.getMulExpr(NewOps);
+ return GenSE.getMulExpr(NewOps);
}
const SCEV *visitUMaxExpr(const SCEVUMaxExpr *E) {
SmallVector<const SCEV *, 4> NewOps;
for (const SCEV *Op : E->operands())
NewOps.push_back(visit(Op));
- return SE.getUMaxExpr(NewOps);
+ return GenSE.getUMaxExpr(NewOps);
}
const SCEV *visitSMaxExpr(const SCEVSMaxExpr *E) {
SmallVector<const SCEV *, 4> NewOps;
for (const SCEV *Op : E->operands())
NewOps.push_back(visit(Op));
- return SE.getSMaxExpr(NewOps);
+ return GenSE.getSMaxExpr(NewOps);
}
const SCEV *visitUMinExpr(const SCEVUMinExpr *E) {
SmallVector<const SCEV *, 4> NewOps;
for (const SCEV *Op : E->operands())
NewOps.push_back(visit(Op));
- return SE.getUMinExpr(NewOps);
+ return GenSE.getUMinExpr(NewOps);
}
const SCEV *visitSMinExpr(const SCEVSMinExpr *E) {
SmallVector<const SCEV *, 4> NewOps;
for (const SCEV *Op : E->operands())
NewOps.push_back(visit(Op));
- return SE.getSMinExpr(NewOps);
+ return GenSE.getSMinExpr(NewOps);
}
const SCEV *visitSequentialUMinExpr(const SCEVSequentialUMinExpr *E) {
SmallVector<const SCEV *, 4> NewOps;
for (const SCEV *Op : E->operands())
NewOps.push_back(visit(Op));
- return SE.getUMinExpr(NewOps, /*Sequential=*/true);
+ return GenSE.getUMinExpr(NewOps, /*Sequential=*/true);
}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) {
SmallVector<const SCEV *, 4> NewOps;
for (const SCEV *Op : E->operands())
NewOps.push_back(visit(Op));
- return SE.getAddRecExpr(NewOps, E->getLoop(), E->getNoWrapFlags());
+
+ const Loop *L = E->getLoop();
+ const SCEV *GenLRepl = LoopMap ? LoopMap->lookup(L) : nullptr;
+ if (!GenLRepl)
+ return GenSE.getAddRecExpr(NewOps, L, E->getNoWrapFlags());
+
+ // evaluateAtIteration replaces the SCEVAddrExpr with a direct calculation.
+ const SCEV *Evaluated =
+ SCEVAddRecExpr::evaluateAtIteration(NewOps, GenLRepl, GenSE);
+
+ // FIXME: This emits a SCEV for GenSE (since GenLRepl will refer to the
+ // induction variable of a generated loop), so we should not use SCEVVisitor
+ // with it. Howver, it still contains references to the SCoP region.
+ return visit(Evaluated);
}
///}
};
-Value *polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL,
- const char *Name, const SCEV *E, Type *Ty,
- Instruction *IP, ValueMapT *VMap,
+Value *polly::expandCodeFor(Scop &S, llvm::ScalarEvolution &SE,
+ llvm::Function *GenFn, ScalarEvolution &GenSE,
+ const DataLayout &DL, const char *Name,
+ const SCEV *E, Type *Ty, Instruction *IP,
+ ValueMapT *VMap, LoopToScevMapT *LoopMap,
BasicBlock *RTCBB) {
- ScopExpander Expander(S.getRegion(), SE, DL, Name, VMap, RTCBB);
+ ScopExpander Expander(S.getRegion(), SE, GenFn, GenSE, DL, Name, VMap,
+ LoopMap, RTCBB);
return Expander.expandCodeFor(E, Ty, IP);
}
More information about the llvm-commits
mailing list