[polly] e470f92 - [Polly] Implement user-directed loop distribution/fission.
Michael Kruse via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 23 19:13:12 PDT 2021
Author: Michael Kruse
Date: 2021-09-23T21:11:01-05:00
New Revision: e470f9268a448fedea25289ec343f82ff52ccc36
URL: https://github.com/llvm/llvm-project/commit/e470f9268a448fedea25289ec343f82ff52ccc36
DIFF: https://github.com/llvm/llvm-project/commit/e470f9268a448fedea25289ec343f82ff52ccc36.diff
LOG: [Polly] Implement user-directed loop distribution/fission.
This is a simple version without the possibility to define distribute
points or followup-transformations. However, it is the first
transformation that has to check whether the transformation is correct.
It interprets the same metadata as the LoopDistribute pass.
Re-apply after revert in c7bcd72a38bcf99e03e4651ed5204d1a1f2bf695 with
fix: Take isBand out of #ifndef NDEBUG since it now is used
unconditionally.
Added:
polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll
polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll
polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll
Modified:
polly/include/polly/DependenceInfo.h
polly/include/polly/ManualOptimizer.h
polly/include/polly/ScheduleTreeTransform.h
polly/lib/Analysis/DependenceInfo.cpp
polly/lib/Transform/ManualOptimizer.cpp
polly/lib/Transform/ScheduleOptimizer.cpp
polly/lib/Transform/ScheduleTreeTransform.cpp
Removed:
################################################################################
diff --git a/polly/include/polly/DependenceInfo.h b/polly/include/polly/DependenceInfo.h
index a8b11191d619c..3d70ea2b74e54 100644
--- a/polly/include/polly/DependenceInfo.h
+++ b/polly/include/polly/DependenceInfo.h
@@ -124,6 +124,10 @@ struct Dependences {
/// dependences.
bool isValidSchedule(Scop &S, const StatementToIslMapTy &NewSchedules) const;
+ /// Return true of the schedule @p NewSched is a schedule for @S that does not
+ /// violate any dependences.
+ bool isValidSchedule(Scop &S, isl::schedule NewSched) const;
+
/// Print the stored dependence information.
void print(llvm::raw_ostream &OS) const;
diff --git a/polly/include/polly/ManualOptimizer.h b/polly/include/polly/ManualOptimizer.h
index 066eb4d84c511..988926334eb1a 100644
--- a/polly/include/polly/ManualOptimizer.h
+++ b/polly/include/polly/ManualOptimizer.h
@@ -15,8 +15,13 @@
#include "isl/isl-noexceptions.h"
+namespace llvm {
+class OptimizationRemarkEmitter;
+}
+
namespace polly {
class Scop;
+struct Dependences;
/// Apply loop-transformation metadata.
///
@@ -30,7 +35,9 @@ class Scop;
/// @return The transformed schedule with all mark-nodes with loop
/// transformations applied. Returns NULL in case of an error or @p
/// Sched itself if no transformation has been applied.
-isl::schedule applyManualTransformations(Scop *S, isl::schedule Sched);
+isl::schedule applyManualTransformations(Scop *S, isl::schedule Sched,
+ const Dependences &D,
+ llvm::OptimizationRemarkEmitter *ORE);
} // namespace polly
#endif /* POLLY_MANUALOPTIMIZER_H */
diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h
index 5fd0d6ad4dd0c..e8685313c83c4 100644
--- a/polly/include/polly/ScheduleTreeTransform.h
+++ b/polly/include/polly/ScheduleTreeTransform.h
@@ -178,6 +178,9 @@ isl::schedule applyFullUnroll(isl::schedule_node BandToUnroll);
/// Replace the AST band @p BandToUnroll by a partially unrolled equivalent.
isl::schedule applyPartialUnroll(isl::schedule_node BandToUnroll, int Factor);
+/// Loop-distribute the band @p BandToFission as much as possible.
+isl::schedule applyMaxFission(isl::schedule_node BandToFission);
+
/// Build the desired set of partial tile prefixes.
///
/// We build a set of partial tile prefixes, which are prefixes of the vector
diff --git a/polly/lib/Analysis/DependenceInfo.cpp b/polly/lib/Analysis/DependenceInfo.cpp
index 709bce7ea3b60..0ac7ff1a14c0b 100644
--- a/polly/lib/Analysis/DependenceInfo.cpp
+++ b/polly/lib/Analysis/DependenceInfo.cpp
@@ -636,6 +636,19 @@ void Dependences::calculateDependences(Scop &S) {
LLVM_DEBUG(dump());
}
+bool Dependences::isValidSchedule(Scop &S, isl::schedule NewSched) const {
+ // TODO: Also check permutable/coincident flags as well.
+
+ StatementToIslMapTy NewSchedules;
+ for (auto NewMap : NewSched.get_map().get_map_list()) {
+ auto Stmt = reinterpret_cast<ScopStmt *>(
+ NewMap.get_tuple_id(isl::dim::in).get_user());
+ NewSchedules[Stmt] = NewMap;
+ }
+
+ return isValidSchedule(S, NewSchedules);
+}
+
bool Dependences::isValidSchedule(
Scop &S, const StatementToIslMapTy &NewSchedule) const {
if (LegalityCheckDisabled)
diff --git a/polly/lib/Transform/ManualOptimizer.cpp b/polly/lib/Transform/ManualOptimizer.cpp
index 2a77f7d49ae22..2c05927582e28 100644
--- a/polly/lib/Transform/ManualOptimizer.cpp
+++ b/polly/lib/Transform/ManualOptimizer.cpp
@@ -11,11 +11,14 @@
//===----------------------------------------------------------------------===//
#include "polly/ManualOptimizer.h"
+#include "polly/DependenceInfo.h"
+#include "polly/Options.h"
#include "polly/ScheduleTreeTransform.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -25,6 +28,12 @@ using namespace polly;
using namespace llvm;
namespace {
+
+static cl::opt<bool> IgnoreDepcheck(
+ "polly-pragma-ignore-depcheck",
+ cl::desc("Skip the dependency check for pragma-based transformations"),
+ cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+
/// Same as llvm::hasUnrollTransformation(), but takes a LoopID as argument
/// instead of a Loop.
static TransformationMode hasUnrollTransformation(MDNode *LoopID) {
@@ -48,6 +57,31 @@ static TransformationMode hasUnrollTransformation(MDNode *LoopID) {
return TM_Unspecified;
}
+// Return the first DebugLoc in the list.
+static DebugLoc findFirstDebugLoc(MDNode *MD) {
+ if (MD) {
+ for (const MDOperand &X : drop_begin(MD->operands(), 1)) {
+ Metadata *A = X.get();
+ if (!isa<DILocation>(A))
+ continue;
+ return cast<DILocation>(A);
+ }
+ }
+
+ return {};
+}
+
+static DebugLoc findTransformationDebugLoc(MDNode *LoopMD, StringRef Name) {
+ // First find dedicated transformation location
+ // (such as the location of #pragma clang loop)
+ MDNode *MD = findOptionMDForLoopID(LoopMD, Name);
+ if (DebugLoc K = findFirstDebugLoc(MD))
+ return K;
+
+ // Otherwise, fall back to the location of the loop itself
+ return findFirstDebugLoc(LoopMD);
+}
+
/// Apply full or partial unrolling.
static isl::schedule applyLoopUnroll(MDNode *LoopMD,
isl::schedule_node BandToUnroll) {
@@ -78,6 +112,15 @@ static isl::schedule applyLoopUnroll(MDNode *LoopMD,
return {};
}
+static isl::schedule applyLoopFission(MDNode *LoopMD,
+ isl::schedule_node BandToFission) {
+ // TODO: Make it possible to selectively fission substatements.
+ // TODO: Apply followup loop properties.
+ // TODO: Instead of fission every statement, find the maximum set that does
+ // not cause a dependency violation.
+ return applyMaxFission(BandToFission);
+}
+
// Return the properties from a LoopID. Scalar properties are ignored.
static auto getLoopMDProps(MDNode *LoopMD) {
return map_range(
@@ -96,14 +139,76 @@ class SearchTransformVisitor
BaseTy &getBase() { return *this; }
const BaseTy &getBase() const { return *this; }
+ polly::Scop *S;
+ const Dependences *D;
+ OptimizationRemarkEmitter *ORE;
+
// Set after a transformation is applied. Recursive search must be aborted
// once this happens to ensure that any new followup transformation is
// transformed in innermost-first order.
isl::schedule Result;
+ /// Check wether a schedule after a transformation is legal. Return the old
+ /// schedule without the transformation.
+ isl::schedule
+ checkDependencyViolation(llvm::MDNode *LoopMD, llvm::Value *CodeRegion,
+ const isl::schedule_node &OrigBand,
+ StringRef DebugLocAttr, StringRef TransPrefix,
+ StringRef RemarkName, StringRef TransformationName) {
+ if (D->isValidSchedule(*S, Result))
+ return Result;
+
+ LLVMContext &Ctx = LoopMD->getContext();
+ LLVM_DEBUG(dbgs() << "Dependency violation detected\n");
+
+ DebugLoc TransformLoc = findTransformationDebugLoc(LoopMD, DebugLocAttr);
+
+ if (IgnoreDepcheck) {
+ LLVM_DEBUG(dbgs() << "Still accepting transformation due to "
+ "-polly-pragma-ignore-depcheck\n");
+ if (ORE) {
+ ORE->emit(
+ OptimizationRemark(DEBUG_TYPE, RemarkName, TransformLoc, CodeRegion)
+ << (Twine("Could not verify dependencies for ") +
+ TransformationName +
+ "; still applying because of -polly-pragma-ignore-depcheck")
+ .str());
+ }
+ return Result;
+ }
+
+ LLVM_DEBUG(dbgs() << "Rolling back transformation\n");
+
+ if (ORE) {
+ ORE->emit(DiagnosticInfoOptimizationFailure(DEBUG_TYPE, RemarkName,
+ TransformLoc, CodeRegion)
+ << (Twine("not applying ") + TransformationName +
+ ": cannot ensure semantic equivalence due to possible "
+ "dependency violations")
+ .str());
+ }
+
+ // If illegal, revert and remove the transformation to not risk re-trying
+ // indefintely.
+ MDNode *NewLoopMD =
+ makePostTransformationMetadata(Ctx, LoopMD, {TransPrefix}, {});
+ BandAttr *Attr = getBandAttr(OrigBand);
+ Attr->Metadata = NewLoopMD;
+
+ // Roll back old schedule.
+ return OrigBand.get_schedule();
+ }
+
public:
- static isl::schedule applyOneTransformation(const isl::schedule &Sched) {
- SearchTransformVisitor Transformer;
+ SearchTransformVisitor(polly::Scop *S, const Dependences *D,
+ OptimizationRemarkEmitter *ORE)
+ : S(S), D(D), ORE(ORE) {}
+
+ static isl::schedule applyOneTransformation(polly::Scop *S,
+ const Dependences *D,
+ OptimizationRemarkEmitter *ORE,
+ const isl::schedule &Sched) {
+ SearchTransformVisitor Transformer(S, D, ORE);
Transformer.visit(Sched);
return Transformer.Result;
}
@@ -125,6 +230,14 @@ class SearchTransformVisitor
return;
}
+ // CodeRegion used but ORE to determine code hotness.
+ // TODO: Works only for original loop; for transformed loops, should track
+ // where the loop's body code comes from.
+ Loop *Loop = Attr->OriginalLoop;
+ Value *CodeRegion = nullptr;
+ if (Loop)
+ CodeRegion = Loop->getHeader();
+
MDNode *LoopMD = Attr->Metadata;
if (!LoopMD)
return;
@@ -146,6 +259,15 @@ class SearchTransformVisitor
Result = applyLoopUnroll(LoopMD, Band);
if (!Result.is_null())
return;
+ } else if (AttrName == "llvm.loop.distribute.enable") {
+ Result = applyLoopFission(LoopMD, Band);
+ if (!Result.is_null())
+ Result = checkDependencyViolation(
+ LoopMD, CodeRegion, Band, "llvm.loop.distribute.loc",
+ "llvm.loop.distribute.", "FailedRequestedFission",
+ "loop fission/distribution");
+ if (!Result.is_null())
+ return;
}
// not a loop transformation; look for next property
@@ -162,11 +284,14 @@ class SearchTransformVisitor
} // namespace
-isl::schedule polly::applyManualTransformations(Scop *S, isl::schedule Sched) {
+isl::schedule
+polly::applyManualTransformations(Scop *S, isl::schedule Sched,
+ const Dependences &D,
+ OptimizationRemarkEmitter *ORE) {
// Search the loop nest for transformations until fixpoint.
while (true) {
isl::schedule Result =
- SearchTransformVisitor::applyOneTransformation(Sched);
+ SearchTransformVisitor::applyOneTransformation(S, &D, ORE, Sched);
if (Result.is_null()) {
// No (more) transformation has been found.
break;
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 355a90f65f8a2..4c47b8fce76a9 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -55,6 +55,7 @@
#include "polly/Support/ISLOStream.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "isl/options.h"
@@ -668,7 +669,9 @@ static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) {
static bool runIslScheduleOptimizer(
Scop &S,
function_ref<const Dependences &(Dependences::AnalysisLevel)> GetDeps,
- TargetTransformInfo *TTI, isl::schedule &LastSchedule) {
+ TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE,
+ isl::schedule &LastSchedule) {
+
// Skip SCoPs in case they're already optimised by PPCGCodeGeneration
if (S.isToBeSkipped())
return false;
@@ -689,8 +692,8 @@ static bool runIslScheduleOptimizer(
bool HasUserTransformation = false;
if (PragmaBasedOpts) {
- isl::schedule ManuallyTransformed =
- applyManualTransformations(&S, Schedule);
+ isl::schedule ManuallyTransformed = applyManualTransformations(
+ &S, Schedule, GetDeps(Dependences::AL_Statement), ORE);
if (ManuallyTransformed.is_null()) {
LLVM_DEBUG(dbgs() << "Error during manual optimization\n");
return false;
@@ -849,7 +852,9 @@ static bool runIslScheduleOptimizer(
walkScheduleTreeForStatistics(Schedule, 2);
}
- if (!ScheduleTreeOptimizer::isProfitableSchedule(S, Schedule))
+ // Skip profitability check if user transformation(s) have been applied.
+ if (!HasUserTransformation &&
+ !ScheduleTreeOptimizer::isProfitableSchedule(S, Schedule))
return false;
auto ScopStats = S.getStatistics();
@@ -878,9 +883,11 @@ bool IslScheduleOptimizerWrapperPass::runOnScop(Scop &S) {
return getAnalysis<DependenceInfo>().getDependences(
Dependences::AL_Statement);
};
+ OptimizationRemarkEmitter &ORE =
+ getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- return runIslScheduleOptimizer(S, getDependences, TTI, LastSchedule);
+ return runIslScheduleOptimizer(S, getDependences, TTI, &ORE, LastSchedule);
}
static void runScheduleOptimizerPrinter(raw_ostream &OS,
@@ -915,8 +922,10 @@ void IslScheduleOptimizerWrapperPass::getAnalysisUsage(
ScopPass::getAnalysisUsage(AU);
AU.addRequired<DependenceInfo>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addPreserved<DependenceInfo>();
+ AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
}
} // namespace
@@ -930,6 +939,7 @@ INITIALIZE_PASS_BEGIN(IslScheduleOptimizerWrapperPass, "polly-opt-isl",
INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
INITIALIZE_PASS_DEPENDENCY(ScopInfoRegionPass);
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass);
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
INITIALIZE_PASS_END(IslScheduleOptimizerWrapperPass, "polly-opt-isl",
"Polly - Optimize schedule of SCoP", false, false)
@@ -941,9 +951,10 @@ runIslScheduleOptimizerUsingNPM(Scop &S, ScopAnalysisManager &SAM,
auto GetDeps = [&Deps](Dependences::AnalysisLevel) -> const Dependences & {
return Deps.getDependences(Dependences::AL_Statement);
};
+ OptimizationRemarkEmitter ORE(&S.getFunction());
TargetTransformInfo *TTI = &SAR.TTI;
isl::schedule LastSchedule;
- bool Modified = runIslScheduleOptimizer(S, GetDeps, TTI, LastSchedule);
+ bool Modified = runIslScheduleOptimizer(S, GetDeps, TTI, &ORE, LastSchedule);
if (OS) {
*OS << "Printing analysis 'Polly - Optimize schedule of SCoP' for region: '"
<< S.getName() << "' in function '" << S.getFunction().getName()
diff --git a/polly/lib/Transform/ScheduleTreeTransform.cpp b/polly/lib/Transform/ScheduleTreeTransform.cpp
index f8ca449744740..ce4e6ae9614db 100644
--- a/polly/lib/Transform/ScheduleTreeTransform.cpp
+++ b/polly/lib/Transform/ScheduleTreeTransform.cpp
@@ -384,19 +384,22 @@ static bool isMark(const isl::schedule_node &Node) {
return isl_schedule_node_get_type(Node.get()) == isl_schedule_node_mark;
}
-#ifndef NDEBUG
/// Is this node of type band?
static bool isBand(const isl::schedule_node &Node) {
return isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band;
}
+#ifndef NDEBUG
/// Is this node a band of a single dimension (i.e. could represent a loop)?
static bool isBandWithSingleLoop(const isl::schedule_node &Node) {
-
return isBand(Node) && isl_schedule_node_band_n_member(Node.get()) == 1;
}
#endif
+static bool isLeaf(const isl::schedule_node &Node) {
+ return isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf;
+}
+
/// Create an isl::id representing the output loop after a transformation.
static isl::id createGeneratedLoopAttr(isl::ctx Ctx, MDNode *FollowupLoopMD) {
// Don't need to id the followup.
@@ -728,3 +731,46 @@ isl::schedule_node polly::applyRegisterTiling(isl::schedule_node Node,
return Node.as<isl::schedule_node_band>().set_ast_build_options(
isl::union_set(Ctx, "{unroll[x]}"));
}
+
+/// Find statements and sub-loops in (possibly nested) sequences.
+static void
+collectFussionableStmts(isl::schedule_node Node,
+ SmallVectorImpl<isl::schedule_node> &ScheduleStmts) {
+ if (isBand(Node) || isLeaf(Node)) {
+ ScheduleStmts.push_back(Node);
+ return;
+ }
+
+ if (Node.has_children()) {
+ isl::schedule_node C = Node.first_child();
+ while (true) {
+ collectFussionableStmts(C, ScheduleStmts);
+ if (!C.has_next_sibling())
+ break;
+ C = C.next_sibling();
+ }
+ }
+}
+
+isl::schedule polly::applyMaxFission(isl::schedule_node BandToFission) {
+ isl::ctx Ctx = BandToFission.ctx();
+ BandToFission = removeMark(BandToFission);
+ isl::schedule_node BandBody = BandToFission.child(0);
+
+ SmallVector<isl::schedule_node> FissionableStmts;
+ collectFussionableStmts(BandBody, FissionableStmts);
+ size_t N = FissionableStmts.size();
+
+ // Collect the domain for each of the statements that will get their own loop.
+ isl::union_set_list DomList = isl::union_set_list(Ctx, N);
+ for (size_t i = 0; i < N; ++i) {
+ isl::schedule_node BodyPart = FissionableStmts[i];
+ DomList = DomList.add(BodyPart.get_domain());
+ }
+
+ // Apply the fission by copying the entire loop, but inserting a filter for
+ // the statement domains for each fissioned loop.
+ isl::schedule_node Fissioned = BandToFission.insert_sequence(DomList);
+
+ return Fissioned.get_schedule();
+}
diff --git a/polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll
new file mode 100644
index 0000000000000..f8c311be07c75
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll
@@ -0,0 +1,51 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=1 -analyze < %s | FileCheck %s --match-full-lines --check-prefix=ON
+; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=0 -analyze < %s | FileCheck %s --match-full-lines --check-prefix=OFF
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+ br label %for
+
+for:
+ %j = phi i32 [0, %entry], [%j.inc, %inc]
+ %j.cmp = icmp slt i32 %j, %n
+ br i1 %j.cmp, label %body, label %exit
+
+ body:
+ store double 42.0, double* %A
+ %c = fadd double 21.0, 21.0
+ store double %c, double* %B
+ br label %inc
+
+inc:
+ %j.inc = add nuw nsw i32 %j, 1
+ br label %for, !llvm.loop !2
+
+exit:
+ br label %return
+
+return:
+ ret void
+}
+
+
+!2 = distinct !{!2, !5}
+!5 = !{!"llvm.loop.distribute.enable"}
+
+
+; ON: Printing analysis 'Polly - Optimize schedule of SCoP' for region: 'for => return' in function 'func':
+; ON: Calculated schedule:
+; ON-NEXT: domain: "[n] -> { Stmt_body[i0] : 0 <= i0 < n; Stmt_body_b[i0] : 0 <= i0 < n }"
+; ON-NEXT: child:
+; ON-NEXT: sequence:
+; ON-NEXT: - filter: "[n] -> { Stmt_body[i0] : 0 <= i0 < n }"
+; ON-NEXT: child:
+; ON-NEXT: schedule: "[n] -> [{ Stmt_body[i0] -> [(i0)] }]"
+; ON-NEXT: - filter: "[n] -> { Stmt_body_b[i0] : 0 <= i0 < n }"
+; ON-NEXT: child:
+; ON-NEXT: schedule: "[n] -> [{ Stmt_body_b[i0] -> [(i0)] }]"
+
+
+; OFF-LABEL: Printing analysis 'Polly - Optimize schedule of SCoP' for region: 'for => return' in function 'func':
+; OFF-NEXT: Calculated schedule:
+; OFF-NEXT: n/a
+
diff --git a/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll
new file mode 100644
index 0000000000000..f464f5a367e06
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll
@@ -0,0 +1,109 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=1 -disable-output < %s 2>&1 | FileCheck %s --match-full-lines
+;
+; CHECK: warning: distribute_illegal.c:2:3: not applying loop fission/distribution: cannot ensure semantic equivalence due to possible dependency violations
+;
+; void foo(double *A,double *B) {
+; for (int i = 1; i < 128; ++i) {
+; A[i] = i;
+; B[i] = A[i+1];
+; }
+; }
+
+source_filename = "distribute_illegal.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @foo(double* %A, double* %B) #0 !dbg !7 {
+entry:
+ call void @llvm.dbg.value(metadata double* %A, metadata !13, metadata !DIExpression()), !dbg !18
+ call void @llvm.dbg.value(metadata double* %B, metadata !14, metadata !DIExpression()), !dbg !18
+ call void @llvm.dbg.value(metadata i32 1, metadata !15, metadata !DIExpression()), !dbg !19
+ br label %for.cond, !dbg !20
+
+for.cond:
+ %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ], !dbg !19
+ call void @llvm.dbg.value(metadata i32 %i.0, metadata !15, metadata !DIExpression()), !dbg !19
+ %cmp = icmp slt i32 %i.0, 128, !dbg !21
+ br i1 %cmp, label %for.body, label %for.end, !dbg !23
+
+for.body:
+ %conv = sitofp i32 %i.0 to double, !dbg !24
+ %idxprom = sext i32 %i.0 to i64, !dbg !26
+ %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom, !dbg !26
+ store double %conv, double* %arrayidx, align 8, !dbg !27, !tbaa !28
+
+ %add = add nsw i32 %i.0, 1, !dbg !32
+ %idxprom1 = sext i32 %add to i64, !dbg !33
+ %arrayidx2 = getelementptr inbounds double, double* %A, i64 %idxprom1, !dbg !33
+ %0 = load double, double* %arrayidx2, align 8, !dbg !33, !tbaa !28
+ %idxprom3 = sext i32 %i.0 to i64, !dbg !34
+ %arrayidx4 = getelementptr inbounds double, double* %B, i64 %idxprom3, !dbg !34
+ store double %0, double* %arrayidx4, align 8, !dbg !35, !tbaa !28
+
+ %inc = add nsw i32 %i.0, 1, !dbg !36
+ call void @llvm.dbg.value(metadata i32 %inc, metadata !15, metadata !DIExpression()), !dbg !19
+ br label %for.cond, !dbg !37, !llvm.loop !38
+
+for.end:
+ ret void, !dbg !41
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+attributes #2 = { argmemonly nofree nosync nounwind willreturn }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "distribute_illegal.c", directory: "/path/to")
+!2 = !{i32 7, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 7, !"uwtable", i32 1}
+!6 = !{!"clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
+!12 = !{!13, !14, !15}
+!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
+!14 = !DILocalVariable(name: "B", arg: 2, scope: !7, file: !1, line: 1, type: !10)
+!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 2, type: !17)
+!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 2, column: 3)
+!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!18 = !DILocation(line: 0, scope: !7)
+!19 = !DILocation(line: 0, scope: !16)
+!20 = !DILocation(line: 2, column: 8, scope: !16)
+!21 = !DILocation(line: 2, column: 21, scope: !22)
+!22 = distinct !DILexicalBlock(scope: !16, file: !1, line: 2, column: 3)
+!23 = !DILocation(line: 2, column: 3, scope: !16)
+!24 = !DILocation(line: 3, column: 12, scope: !25)
+!25 = distinct !DILexicalBlock(scope: !22, file: !1, line: 2, column: 33)
+!26 = !DILocation(line: 3, column: 5, scope: !25)
+!27 = !DILocation(line: 3, column: 10, scope: !25)
+!28 = !{!29, !29, i64 0}
+!29 = !{!"double", !30, i64 0}
+!30 = !{!"omnipotent char", !31, i64 0}
+!31 = !{!"Simple C/C++ TBAA"}
+!32 = !DILocation(line: 4, column: 15, scope: !25)
+!33 = !DILocation(line: 4, column: 12, scope: !25)
+!34 = !DILocation(line: 4, column: 5, scope: !25)
+!35 = !DILocation(line: 4, column: 10, scope: !25)
+!36 = !DILocation(line: 2, column: 28, scope: !22)
+!37 = !DILocation(line: 2, column: 3, scope: !22)
+!38 = distinct !{!38, !23, !39, !40, !100}
+!39 = !DILocation(line: 5, column: 3, scope: !16)
+!40 = !{!"llvm.loop.mustprogress"}
+!41 = !DILocation(line: 6, column: 1, scope: !7)
+!100 = !{!"llvm.loop.distribute.enable"}
diff --git a/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll
new file mode 100644
index 0000000000000..866b81420dc1b
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll
@@ -0,0 +1,111 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=1 -disable-output < %s 2<&1 | FileCheck %s --match-full-lines
+;
+; CHECK: warning: distribute_illegal.c:1:42: not applying loop fission/distribution: cannot ensure semantic equivalence due to possible dependency violations
+;
+; void foo(double *A,double *B) {
+; for (int i = 1; i < 128; ++i) {
+; A[i] = i;
+; B[i] = A[i+1];
+; }
+; }
+
+source_filename = "distribute_illegal.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @foo(double* %A, double* %B) #0 !dbg !7 {
+entry:
+ call void @llvm.dbg.value(metadata double* %A, metadata !13, metadata !DIExpression()), !dbg !18
+ call void @llvm.dbg.value(metadata double* %B, metadata !14, metadata !DIExpression()), !dbg !18
+ call void @llvm.dbg.value(metadata i32 1, metadata !15, metadata !DIExpression()), !dbg !19
+ br label %for.cond, !dbg !20
+
+for.cond:
+ %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ], !dbg !19
+ call void @llvm.dbg.value(metadata i32 %i.0, metadata !15, metadata !DIExpression()), !dbg !19
+ %cmp = icmp slt i32 %i.0, 128, !dbg !21
+ br i1 %cmp, label %for.body, label %for.end, !dbg !23
+
+for.body:
+ %conv = sitofp i32 %i.0 to double, !dbg !24
+ %idxprom = sext i32 %i.0 to i64, !dbg !26
+ %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom, !dbg !26
+ store double %conv, double* %arrayidx, align 8, !dbg !27, !tbaa !28
+
+ %add = add nsw i32 %i.0, 1, !dbg !32
+ %idxprom1 = sext i32 %add to i64, !dbg !33
+ %arrayidx2 = getelementptr inbounds double, double* %A, i64 %idxprom1, !dbg !33
+ %0 = load double, double* %arrayidx2, align 8, !dbg !33, !tbaa !28
+ %idxprom3 = sext i32 %i.0 to i64, !dbg !34
+ %arrayidx4 = getelementptr inbounds double, double* %B, i64 %idxprom3, !dbg !34
+ store double %0, double* %arrayidx4, align 8, !dbg !35, !tbaa !28
+
+ %inc = add nsw i32 %i.0, 1, !dbg !36
+ call void @llvm.dbg.value(metadata i32 %inc, metadata !15, metadata !DIExpression()), !dbg !19
+ br label %for.cond, !dbg !37, !llvm.loop !38
+
+for.end:
+ ret void, !dbg !41
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+attributes #2 = { argmemonly nofree nosync nounwind willreturn }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "distribute_illegal.c", directory: "/path/to")
+!2 = !{i32 7, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 7, !"uwtable", i32 1}
+!6 = !{!"clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
+!12 = !{!13, !14, !15}
+!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
+!14 = !DILocalVariable(name: "B", arg: 2, scope: !7, file: !1, line: 1, type: !10)
+!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 2, type: !17)
+!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 2, column: 3)
+!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!18 = !DILocation(line: 0, scope: !7)
+!19 = !DILocation(line: 0, scope: !16)
+!20 = !DILocation(line: 2, column: 8, scope: !16)
+!21 = !DILocation(line: 2, column: 21, scope: !22)
+!22 = distinct !DILexicalBlock(scope: !16, file: !1, line: 2, column: 3)
+!23 = !DILocation(line: 2, column: 3, scope: !16)
+!24 = !DILocation(line: 3, column: 12, scope: !25)
+!25 = distinct !DILexicalBlock(scope: !22, file: !1, line: 2, column: 33)
+!26 = !DILocation(line: 3, column: 5, scope: !25)
+!27 = !DILocation(line: 3, column: 10, scope: !25)
+!28 = !{!29, !29, i64 0}
+!29 = !{!"double", !30, i64 0}
+!30 = !{!"omnipotent char", !31, i64 0}
+!31 = !{!"Simple C/C++ TBAA"}
+!32 = !DILocation(line: 4, column: 15, scope: !25)
+!33 = !DILocation(line: 4, column: 12, scope: !25)
+!34 = !DILocation(line: 4, column: 5, scope: !25)
+!35 = !DILocation(line: 4, column: 10, scope: !25)
+!36 = !DILocation(line: 2, column: 28, scope: !22)
+!37 = !DILocation(line: 2, column: 3, scope: !22)
+!38 = distinct !{!38, !23, !39, !40, !100, !101}
+!39 = !DILocation(line: 5, column: 3, scope: !16)
+!40 = !{!"llvm.loop.mustprogress"}
+!41 = !DILocation(line: 6, column: 1, scope: !7)
+!100 = !{!"llvm.loop.distribute.enable"}
+!101 = !{!"llvm.loop.distribute.loc", !102}
+!102 = !DILocation(line: 1, column: 42, scope: !16)
More information about the llvm-commits
mailing list