[polly] 52c30ad - [Polly] Implement user-directed loop distribution/fission.

Michael Kruse via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 22 15:40:02 PDT 2021


Author: Michael Kruse
Date: 2021-09-22T17:28:25-05:00
New Revision: 52c30adc7dfe6334b71adf256d81f70e7b976143

URL: https://github.com/llvm/llvm-project/commit/52c30adc7dfe6334b71adf256d81f70e7b976143
DIFF: https://github.com/llvm/llvm-project/commit/52c30adc7dfe6334b71adf256d81f70e7b976143.diff

LOG: [Polly] Implement user-directed loop distribution/fission.

This is a simple version without the possibility to define distribute
points or followup-transformations. However, it is the first
transformation that has to check whether the transformation is correct.

It interprets the same metadata as the LoopDistribute pass.

Added: 
    polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll
    polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll
    polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll

Modified: 
    polly/include/polly/DependenceInfo.h
    polly/include/polly/ManualOptimizer.h
    polly/include/polly/ScheduleTreeTransform.h
    polly/lib/Analysis/DependenceInfo.cpp
    polly/lib/Transform/ManualOptimizer.cpp
    polly/lib/Transform/ScheduleOptimizer.cpp
    polly/lib/Transform/ScheduleTreeTransform.cpp

Removed: 
    


################################################################################
diff  --git a/polly/include/polly/DependenceInfo.h b/polly/include/polly/DependenceInfo.h
index a8b11191d619c..3d70ea2b74e54 100644
--- a/polly/include/polly/DependenceInfo.h
+++ b/polly/include/polly/DependenceInfo.h
@@ -124,6 +124,10 @@ struct Dependences {
   ///         dependences.
   bool isValidSchedule(Scop &S, const StatementToIslMapTy &NewSchedules) const;
 
+  /// Return true of the schedule @p NewSched is a schedule for @S that does not
+  /// violate any dependences.
+  bool isValidSchedule(Scop &S, isl::schedule NewSched) const;
+
   /// Print the stored dependence information.
   void print(llvm::raw_ostream &OS) const;
 

diff  --git a/polly/include/polly/ManualOptimizer.h b/polly/include/polly/ManualOptimizer.h
index 066eb4d84c511..988926334eb1a 100644
--- a/polly/include/polly/ManualOptimizer.h
+++ b/polly/include/polly/ManualOptimizer.h
@@ -15,8 +15,13 @@
 
 #include "isl/isl-noexceptions.h"
 
+namespace llvm {
+class OptimizationRemarkEmitter;
+}
+
 namespace polly {
 class Scop;
+struct Dependences;
 
 /// Apply loop-transformation metadata.
 ///
@@ -30,7 +35,9 @@ class Scop;
 /// @return The transformed schedule with all mark-nodes with loop
 ///         transformations applied. Returns NULL in case of an error or @p
 ///         Sched itself if no transformation has been applied.
-isl::schedule applyManualTransformations(Scop *S, isl::schedule Sched);
+isl::schedule applyManualTransformations(Scop *S, isl::schedule Sched,
+                                         const Dependences &D,
+                                         llvm::OptimizationRemarkEmitter *ORE);
 } // namespace polly
 
 #endif /* POLLY_MANUALOPTIMIZER_H */

diff  --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h
index 5fd0d6ad4dd0c..e8685313c83c4 100644
--- a/polly/include/polly/ScheduleTreeTransform.h
+++ b/polly/include/polly/ScheduleTreeTransform.h
@@ -178,6 +178,9 @@ isl::schedule applyFullUnroll(isl::schedule_node BandToUnroll);
 /// Replace the AST band @p BandToUnroll by a partially unrolled equivalent.
 isl::schedule applyPartialUnroll(isl::schedule_node BandToUnroll, int Factor);
 
+/// Loop-distribute the band @p BandToFission as much as possible.
+isl::schedule applyMaxFission(isl::schedule_node BandToFission);
+
 /// Build the desired set of partial tile prefixes.
 ///
 /// We build a set of partial tile prefixes, which are prefixes of the vector

diff  --git a/polly/lib/Analysis/DependenceInfo.cpp b/polly/lib/Analysis/DependenceInfo.cpp
index 709bce7ea3b60..0ac7ff1a14c0b 100644
--- a/polly/lib/Analysis/DependenceInfo.cpp
+++ b/polly/lib/Analysis/DependenceInfo.cpp
@@ -636,6 +636,19 @@ void Dependences::calculateDependences(Scop &S) {
   LLVM_DEBUG(dump());
 }
 
+bool Dependences::isValidSchedule(Scop &S, isl::schedule NewSched) const {
+  // TODO: Also check permutable/coincident flags as well.
+
+  StatementToIslMapTy NewSchedules;
+  for (auto NewMap : NewSched.get_map().get_map_list()) {
+    auto Stmt = reinterpret_cast<ScopStmt *>(
+        NewMap.get_tuple_id(isl::dim::in).get_user());
+    NewSchedules[Stmt] = NewMap;
+  }
+
+  return isValidSchedule(S, NewSchedules);
+}
+
 bool Dependences::isValidSchedule(
     Scop &S, const StatementToIslMapTy &NewSchedule) const {
   if (LegalityCheckDisabled)

diff  --git a/polly/lib/Transform/ManualOptimizer.cpp b/polly/lib/Transform/ManualOptimizer.cpp
index 2a77f7d49ae22..2c05927582e28 100644
--- a/polly/lib/Transform/ManualOptimizer.cpp
+++ b/polly/lib/Transform/ManualOptimizer.cpp
@@ -11,11 +11,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "polly/ManualOptimizer.h"
+#include "polly/DependenceInfo.h"
+#include "polly/Options.h"
 #include "polly/ScheduleTreeTransform.h"
 #include "polly/Support/ScopHelper.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 
@@ -25,6 +28,12 @@ using namespace polly;
 using namespace llvm;
 
 namespace {
+
+static cl::opt<bool> IgnoreDepcheck(
+    "polly-pragma-ignore-depcheck",
+    cl::desc("Skip the dependency check for pragma-based transformations"),
+    cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+
 /// Same as llvm::hasUnrollTransformation(), but takes a LoopID as argument
 /// instead of a Loop.
 static TransformationMode hasUnrollTransformation(MDNode *LoopID) {
@@ -48,6 +57,31 @@ static TransformationMode hasUnrollTransformation(MDNode *LoopID) {
   return TM_Unspecified;
 }
 
+// Return the first DebugLoc in the list.
+static DebugLoc findFirstDebugLoc(MDNode *MD) {
+  if (MD) {
+    for (const MDOperand &X : drop_begin(MD->operands(), 1)) {
+      Metadata *A = X.get();
+      if (!isa<DILocation>(A))
+        continue;
+      return cast<DILocation>(A);
+    }
+  }
+
+  return {};
+}
+
+static DebugLoc findTransformationDebugLoc(MDNode *LoopMD, StringRef Name) {
+  // First find dedicated transformation location
+  // (such as the location of #pragma clang loop)
+  MDNode *MD = findOptionMDForLoopID(LoopMD, Name);
+  if (DebugLoc K = findFirstDebugLoc(MD))
+    return K;
+
+  // Otherwise, fall back to the location of the loop itself
+  return findFirstDebugLoc(LoopMD);
+}
+
 /// Apply full or partial unrolling.
 static isl::schedule applyLoopUnroll(MDNode *LoopMD,
                                      isl::schedule_node BandToUnroll) {
@@ -78,6 +112,15 @@ static isl::schedule applyLoopUnroll(MDNode *LoopMD,
   return {};
 }
 
+static isl::schedule applyLoopFission(MDNode *LoopMD,
+                                      isl::schedule_node BandToFission) {
+  // TODO: Make it possible to selectively fission substatements.
+  // TODO: Apply followup loop properties.
+  // TODO: Instead of fission every statement, find the maximum set that does
+  // not cause a dependency violation.
+  return applyMaxFission(BandToFission);
+}
+
 // Return the properties from a LoopID. Scalar properties are ignored.
 static auto getLoopMDProps(MDNode *LoopMD) {
   return map_range(
@@ -96,14 +139,76 @@ class SearchTransformVisitor
   BaseTy &getBase() { return *this; }
   const BaseTy &getBase() const { return *this; }
 
+  polly::Scop *S;
+  const Dependences *D;
+  OptimizationRemarkEmitter *ORE;
+
   // Set after a transformation is applied. Recursive search must be aborted
   // once this happens to ensure that any new followup transformation is
   // transformed in innermost-first order.
   isl::schedule Result;
 
+  /// Check wether a schedule after a  transformation is legal. Return the old
+  /// schedule without the transformation.
+  isl::schedule
+  checkDependencyViolation(llvm::MDNode *LoopMD, llvm::Value *CodeRegion,
+                           const isl::schedule_node &OrigBand,
+                           StringRef DebugLocAttr, StringRef TransPrefix,
+                           StringRef RemarkName, StringRef TransformationName) {
+    if (D->isValidSchedule(*S, Result))
+      return Result;
+
+    LLVMContext &Ctx = LoopMD->getContext();
+    LLVM_DEBUG(dbgs() << "Dependency violation detected\n");
+
+    DebugLoc TransformLoc = findTransformationDebugLoc(LoopMD, DebugLocAttr);
+
+    if (IgnoreDepcheck) {
+      LLVM_DEBUG(dbgs() << "Still accepting transformation due to "
+                           "-polly-pragma-ignore-depcheck\n");
+      if (ORE) {
+        ORE->emit(
+            OptimizationRemark(DEBUG_TYPE, RemarkName, TransformLoc, CodeRegion)
+            << (Twine("Could not verify dependencies for ") +
+                TransformationName +
+                "; still applying because of -polly-pragma-ignore-depcheck")
+                   .str());
+      }
+      return Result;
+    }
+
+    LLVM_DEBUG(dbgs() << "Rolling back transformation\n");
+
+    if (ORE) {
+      ORE->emit(DiagnosticInfoOptimizationFailure(DEBUG_TYPE, RemarkName,
+                                                  TransformLoc, CodeRegion)
+                << (Twine("not applying ") + TransformationName +
+                    ": cannot ensure semantic equivalence due to possible "
+                    "dependency violations")
+                       .str());
+    }
+
+    // If illegal, revert and remove the transformation to not risk re-trying
+    // indefintely.
+    MDNode *NewLoopMD =
+        makePostTransformationMetadata(Ctx, LoopMD, {TransPrefix}, {});
+    BandAttr *Attr = getBandAttr(OrigBand);
+    Attr->Metadata = NewLoopMD;
+
+    // Roll back old schedule.
+    return OrigBand.get_schedule();
+  }
+
 public:
-  static isl::schedule applyOneTransformation(const isl::schedule &Sched) {
-    SearchTransformVisitor Transformer;
+  SearchTransformVisitor(polly::Scop *S, const Dependences *D,
+                         OptimizationRemarkEmitter *ORE)
+      : S(S), D(D), ORE(ORE) {}
+
+  static isl::schedule applyOneTransformation(polly::Scop *S,
+                                              const Dependences *D,
+                                              OptimizationRemarkEmitter *ORE,
+                                              const isl::schedule &Sched) {
+    SearchTransformVisitor Transformer(S, D, ORE);
     Transformer.visit(Sched);
     return Transformer.Result;
   }
@@ -125,6 +230,14 @@ class SearchTransformVisitor
       return;
     }
 
+    // CodeRegion used but ORE to determine code hotness.
+    // TODO: Works only for original loop; for transformed loops, should track
+    // where the loop's body code comes from.
+    Loop *Loop = Attr->OriginalLoop;
+    Value *CodeRegion = nullptr;
+    if (Loop)
+      CodeRegion = Loop->getHeader();
+
     MDNode *LoopMD = Attr->Metadata;
     if (!LoopMD)
       return;
@@ -146,6 +259,15 @@ class SearchTransformVisitor
         Result = applyLoopUnroll(LoopMD, Band);
         if (!Result.is_null())
           return;
+      } else if (AttrName == "llvm.loop.distribute.enable") {
+        Result = applyLoopFission(LoopMD, Band);
+        if (!Result.is_null())
+          Result = checkDependencyViolation(
+              LoopMD, CodeRegion, Band, "llvm.loop.distribute.loc",
+              "llvm.loop.distribute.", "FailedRequestedFission",
+              "loop fission/distribution");
+        if (!Result.is_null())
+          return;
       }
 
       // not a loop transformation; look for next property
@@ -162,11 +284,14 @@ class SearchTransformVisitor
 
 } // namespace
 
-isl::schedule polly::applyManualTransformations(Scop *S, isl::schedule Sched) {
+isl::schedule
+polly::applyManualTransformations(Scop *S, isl::schedule Sched,
+                                  const Dependences &D,
+                                  OptimizationRemarkEmitter *ORE) {
   // Search the loop nest for transformations until fixpoint.
   while (true) {
     isl::schedule Result =
-        SearchTransformVisitor::applyOneTransformation(Sched);
+        SearchTransformVisitor::applyOneTransformation(S, &D, ORE, Sched);
     if (Result.is_null()) {
       // No (more) transformation has been found.
       break;

diff  --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp
index 6e48ffe811f56..93bf26f4db51c 100644
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -55,6 +55,7 @@
 #include "polly/Support/ISLOStream.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "isl/options.h"
@@ -668,7 +669,9 @@ static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) {
 static bool runIslScheduleOptimizer(
     Scop &S,
     function_ref<const Dependences &(Dependences::AnalysisLevel)> GetDeps,
-    TargetTransformInfo *TTI, isl::schedule &LastSchedule) {
+    TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE,
+    isl::schedule &LastSchedule) {
+
   // Skip SCoPs in case they're already optimised by PPCGCodeGeneration
   if (S.isToBeSkipped())
     return false;
@@ -689,8 +692,8 @@ static bool runIslScheduleOptimizer(
 
   bool HasUserTransformation = false;
   if (PragmaBasedOpts) {
-    isl::schedule ManuallyTransformed =
-        applyManualTransformations(&S, Schedule);
+    isl::schedule ManuallyTransformed = applyManualTransformations(
+        &S, Schedule, GetDeps(Dependences::AL_Statement), ORE);
     if (ManuallyTransformed.is_null()) {
       LLVM_DEBUG(dbgs() << "Error during manual optimization\n");
       return false;
@@ -864,7 +867,9 @@ static bool runIslScheduleOptimizer(
     walkScheduleTreeForStatistics(Schedule, 2);
   }
 
-  if (!ScheduleTreeOptimizer::isProfitableSchedule(S, Schedule))
+  // Skip profitability check if user transformation(s) have been applied.
+  if (!HasUserTransformation &&
+      !ScheduleTreeOptimizer::isProfitableSchedule(S, Schedule))
     return false;
 
   auto ScopStats = S.getStatistics();
@@ -893,9 +898,11 @@ bool IslScheduleOptimizerWrapperPass::runOnScop(Scop &S) {
     return getAnalysis<DependenceInfo>().getDependences(
         Dependences::AL_Statement);
   };
+  OptimizationRemarkEmitter &ORE =
+      getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
   TargetTransformInfo *TTI =
       &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  return runIslScheduleOptimizer(S, getDependences, TTI, LastSchedule);
+  return runIslScheduleOptimizer(S, getDependences, TTI, &ORE, LastSchedule);
 }
 
 static void runScheduleOptimizerPrinter(raw_ostream &OS,
@@ -930,8 +937,10 @@ void IslScheduleOptimizerWrapperPass::getAnalysisUsage(
   ScopPass::getAnalysisUsage(AU);
   AU.addRequired<DependenceInfo>();
   AU.addRequired<TargetTransformInfoWrapperPass>();
+  AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
 
   AU.addPreserved<DependenceInfo>();
+  AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
 }
 
 } // namespace
@@ -945,6 +954,7 @@ INITIALIZE_PASS_BEGIN(IslScheduleOptimizerWrapperPass, "polly-opt-isl",
 INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
 INITIALIZE_PASS_DEPENDENCY(ScopInfoRegionPass);
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass);
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
 INITIALIZE_PASS_END(IslScheduleOptimizerWrapperPass, "polly-opt-isl",
                     "Polly - Optimize schedule of SCoP", false, false)
 
@@ -956,9 +966,10 @@ runIslScheduleOptimizerUsingNPM(Scop &S, ScopAnalysisManager &SAM,
   auto GetDeps = [&Deps](Dependences::AnalysisLevel) -> const Dependences & {
     return Deps.getDependences(Dependences::AL_Statement);
   };
+  OptimizationRemarkEmitter ORE(&S.getFunction());
   TargetTransformInfo *TTI = &SAR.TTI;
   isl::schedule LastSchedule;
-  bool Modified = runIslScheduleOptimizer(S, GetDeps, TTI, LastSchedule);
+  bool Modified = runIslScheduleOptimizer(S, GetDeps, TTI, &ORE, LastSchedule);
   if (OS) {
     *OS << "Printing analysis 'Polly - Optimize schedule of SCoP' for region: '"
         << S.getName() << "' in function '" << S.getFunction().getName()

diff  --git a/polly/lib/Transform/ScheduleTreeTransform.cpp b/polly/lib/Transform/ScheduleTreeTransform.cpp
index f8ca449744740..f23d696bff1e8 100644
--- a/polly/lib/Transform/ScheduleTreeTransform.cpp
+++ b/polly/lib/Transform/ScheduleTreeTransform.cpp
@@ -397,6 +397,10 @@ static bool isBandWithSingleLoop(const isl::schedule_node &Node) {
 }
 #endif
 
+static bool isLeaf(const isl::schedule_node &Node) {
+  return isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf;
+}
+
 /// Create an isl::id representing the output loop after a transformation.
 static isl::id createGeneratedLoopAttr(isl::ctx Ctx, MDNode *FollowupLoopMD) {
   // Don't need to id the followup.
@@ -728,3 +732,46 @@ isl::schedule_node polly::applyRegisterTiling(isl::schedule_node Node,
   return Node.as<isl::schedule_node_band>().set_ast_build_options(
       isl::union_set(Ctx, "{unroll[x]}"));
 }
+
+/// Find statements and sub-loops in (possibly nested) sequences.
+static void
+collectFussionableStmts(isl::schedule_node Node,
+                        SmallVectorImpl<isl::schedule_node> &ScheduleStmts) {
+  if (isBand(Node) || isLeaf(Node)) {
+    ScheduleStmts.push_back(Node);
+    return;
+  }
+
+  if (Node.has_children()) {
+    isl::schedule_node C = Node.first_child();
+    while (true) {
+      collectFussionableStmts(C, ScheduleStmts);
+      if (!C.has_next_sibling())
+        break;
+      C = C.next_sibling();
+    }
+  }
+}
+
+isl::schedule polly::applyMaxFission(isl::schedule_node BandToFission) {
+  isl::ctx Ctx = BandToFission.ctx();
+  BandToFission = removeMark(BandToFission);
+  isl::schedule_node BandBody = BandToFission.child(0);
+
+  SmallVector<isl::schedule_node> FissionableStmts;
+  collectFussionableStmts(BandBody, FissionableStmts);
+  size_t N = FissionableStmts.size();
+
+  // Collect the domain for each of the statements that will get their own loop.
+  isl::union_set_list DomList = isl::union_set_list(Ctx, N);
+  for (size_t i = 0; i < N; ++i) {
+    isl::schedule_node BodyPart = FissionableStmts[i];
+    DomList = DomList.add(BodyPart.get_domain());
+  }
+
+  // Apply the fission by copying the entire loop, but inserting a filter for
+  // the statement domains for each fissioned loop.
+  isl::schedule_node Fissioned = BandToFission.insert_sequence(DomList);
+
+  return Fissioned.get_schedule();
+}

diff  --git a/polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll
new file mode 100644
index 0000000000000..f8c311be07c75
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_heuristic.ll
@@ -0,0 +1,51 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=1 -analyze < %s | FileCheck %s --match-full-lines --check-prefix=ON
+; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=0 -analyze < %s | FileCheck %s --match-full-lines --check-prefix=OFF
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %body, label %exit
+
+    body:
+      store double 42.0, double* %A
+      %c = fadd double 21.0, 21.0
+      store double %c, double* %B
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for, !llvm.loop !2
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+!2 = distinct !{!2, !5}
+!5 = !{!"llvm.loop.distribute.enable"}
+
+
+; ON: Printing analysis 'Polly - Optimize schedule of SCoP' for region: 'for => return' in function 'func':
+; ON:      Calculated schedule:
+; ON-NEXT: domain: "[n] -> { Stmt_body[i0] : 0 <= i0 < n; Stmt_body_b[i0] : 0 <= i0 < n }"
+; ON-NEXT: child:
+; ON-NEXT:   sequence:
+; ON-NEXT:   - filter: "[n] -> { Stmt_body[i0] : 0 <= i0 < n }"
+; ON-NEXT:     child:
+; ON-NEXT:       schedule: "[n] -> [{ Stmt_body[i0] -> [(i0)] }]"
+; ON-NEXT:   - filter: "[n] -> { Stmt_body_b[i0] : 0 <= i0 < n }"
+; ON-NEXT:     child:
+; ON-NEXT:       schedule: "[n] -> [{ Stmt_body_b[i0] -> [(i0)] }]"
+
+
+; OFF-LABEL: Printing analysis 'Polly - Optimize schedule of SCoP' for region: 'for => return' in function 'func':
+; OFF-NEXT:  Calculated schedule:
+; OFF-NEXT:    n/a
+

diff  --git a/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll
new file mode 100644
index 0000000000000..f464f5a367e06
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_looploc.ll
@@ -0,0 +1,109 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=1 -disable-output < %s 2>&1 | FileCheck %s --match-full-lines
+;
+; CHECK: warning: distribute_illegal.c:2:3: not applying loop fission/distribution: cannot ensure semantic equivalence due to possible dependency violations
+;
+; void foo(double *A,double *B) {
+;   for (int i = 1; i < 128; ++i) {
+;     A[i] = i;
+;     B[i] = A[i+1];
+;   }
+; }
+
+source_filename = "distribute_illegal.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @foo(double* %A, double* %B) #0 !dbg !7 {
+entry:
+  call void @llvm.dbg.value(metadata double* %A, metadata !13, metadata !DIExpression()), !dbg !18
+  call void @llvm.dbg.value(metadata double* %B, metadata !14, metadata !DIExpression()), !dbg !18
+  call void @llvm.dbg.value(metadata i32 1, metadata !15, metadata !DIExpression()), !dbg !19
+  br label %for.cond, !dbg !20
+
+for.cond:
+  %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ], !dbg !19
+  call void @llvm.dbg.value(metadata i32 %i.0, metadata !15, metadata !DIExpression()), !dbg !19
+  %cmp = icmp slt i32 %i.0, 128, !dbg !21
+  br i1 %cmp, label %for.body, label %for.end, !dbg !23
+
+for.body:
+  %conv = sitofp i32 %i.0 to double, !dbg !24
+  %idxprom = sext i32 %i.0 to i64, !dbg !26
+  %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom, !dbg !26
+  store double %conv, double* %arrayidx, align 8, !dbg !27, !tbaa !28
+
+  %add = add nsw i32 %i.0, 1, !dbg !32
+  %idxprom1 = sext i32 %add to i64, !dbg !33
+  %arrayidx2 = getelementptr inbounds double, double* %A, i64 %idxprom1, !dbg !33
+  %0 = load double, double* %arrayidx2, align 8, !dbg !33, !tbaa !28
+  %idxprom3 = sext i32 %i.0 to i64, !dbg !34
+  %arrayidx4 = getelementptr inbounds double, double* %B, i64 %idxprom3, !dbg !34
+  store double %0, double* %arrayidx4, align 8, !dbg !35, !tbaa !28
+
+  %inc = add nsw i32 %i.0, 1, !dbg !36
+  call void @llvm.dbg.value(metadata i32 %inc, metadata !15, metadata !DIExpression()), !dbg !19
+  br label %for.cond, !dbg !37, !llvm.loop !38
+
+for.end:
+  ret void, !dbg !41
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+attributes #2 = { argmemonly nofree nosync nounwind willreturn }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "distribute_illegal.c", directory: "/path/to")
+!2 = !{i32 7, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 7, !"uwtable", i32 1}
+!6 = !{!"clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
+!12 = !{!13, !14, !15}
+!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
+!14 = !DILocalVariable(name: "B", arg: 2, scope: !7, file: !1, line: 1, type: !10)
+!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 2, type: !17)
+!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 2, column: 3)
+!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!18 = !DILocation(line: 0, scope: !7)
+!19 = !DILocation(line: 0, scope: !16)
+!20 = !DILocation(line: 2, column: 8, scope: !16)
+!21 = !DILocation(line: 2, column: 21, scope: !22)
+!22 = distinct !DILexicalBlock(scope: !16, file: !1, line: 2, column: 3)
+!23 = !DILocation(line: 2, column: 3, scope: !16)
+!24 = !DILocation(line: 3, column: 12, scope: !25)
+!25 = distinct !DILexicalBlock(scope: !22, file: !1, line: 2, column: 33)
+!26 = !DILocation(line: 3, column: 5, scope: !25)
+!27 = !DILocation(line: 3, column: 10, scope: !25)
+!28 = !{!29, !29, i64 0}
+!29 = !{!"double", !30, i64 0}
+!30 = !{!"omnipotent char", !31, i64 0}
+!31 = !{!"Simple C/C++ TBAA"}
+!32 = !DILocation(line: 4, column: 15, scope: !25)
+!33 = !DILocation(line: 4, column: 12, scope: !25)
+!34 = !DILocation(line: 4, column: 5, scope: !25)
+!35 = !DILocation(line: 4, column: 10, scope: !25)
+!36 = !DILocation(line: 2, column: 28, scope: !22)
+!37 = !DILocation(line: 2, column: 3, scope: !22)
+!38 = distinct !{!38, !23, !39, !40, !100}
+!39 = !DILocation(line: 5, column: 3, scope: !16)
+!40 = !{!"llvm.loop.mustprogress"}
+!41 = !DILocation(line: 6, column: 1, scope: !7)
+!100 = !{!"llvm.loop.distribute.enable"}

diff  --git a/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll
new file mode 100644
index 0000000000000..866b81420dc1b
--- /dev/null
+++ b/polly/test/ScheduleOptimizer/ManualOptimization/distribute_illegal_pragmaloc.ll
@@ -0,0 +1,111 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-reschedule=0 -polly-pragma-based-opts=1 -disable-output < %s 2<&1 | FileCheck %s --match-full-lines
+;
+; CHECK: warning: distribute_illegal.c:1:42: not applying loop fission/distribution: cannot ensure semantic equivalence due to possible dependency violations
+;
+; void foo(double *A,double *B) {
+;   for (int i = 1; i < 128; ++i) {
+;     A[i] = i;
+;     B[i] = A[i+1];
+;   }
+; }
+
+source_filename = "distribute_illegal.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @foo(double* %A, double* %B) #0 !dbg !7 {
+entry:
+  call void @llvm.dbg.value(metadata double* %A, metadata !13, metadata !DIExpression()), !dbg !18
+  call void @llvm.dbg.value(metadata double* %B, metadata !14, metadata !DIExpression()), !dbg !18
+  call void @llvm.dbg.value(metadata i32 1, metadata !15, metadata !DIExpression()), !dbg !19
+  br label %for.cond, !dbg !20
+
+for.cond:
+  %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.body ], !dbg !19
+  call void @llvm.dbg.value(metadata i32 %i.0, metadata !15, metadata !DIExpression()), !dbg !19
+  %cmp = icmp slt i32 %i.0, 128, !dbg !21
+  br i1 %cmp, label %for.body, label %for.end, !dbg !23
+
+for.body:
+  %conv = sitofp i32 %i.0 to double, !dbg !24
+  %idxprom = sext i32 %i.0 to i64, !dbg !26
+  %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom, !dbg !26
+  store double %conv, double* %arrayidx, align 8, !dbg !27, !tbaa !28
+
+  %add = add nsw i32 %i.0, 1, !dbg !32
+  %idxprom1 = sext i32 %add to i64, !dbg !33
+  %arrayidx2 = getelementptr inbounds double, double* %A, i64 %idxprom1, !dbg !33
+  %0 = load double, double* %arrayidx2, align 8, !dbg !33, !tbaa !28
+  %idxprom3 = sext i32 %i.0 to i64, !dbg !34
+  %arrayidx4 = getelementptr inbounds double, double* %B, i64 %idxprom3, !dbg !34
+  store double %0, double* %arrayidx4, align 8, !dbg !35, !tbaa !28
+
+  %inc = add nsw i32 %i.0, 1, !dbg !36
+  call void @llvm.dbg.value(metadata i32 %inc, metadata !15, metadata !DIExpression()), !dbg !19
+  br label %for.cond, !dbg !37, !llvm.loop !38
+
+for.end:
+  ret void, !dbg !41
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
+attributes #2 = { argmemonly nofree nosync nounwind willreturn }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "distribute_illegal.c", directory: "/path/to")
+!2 = !{i32 7, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 7, !"uwtable", i32 1}
+!6 = !{!"clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 81189783049d2b93f653c121d3731fd1732a3916)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
+!12 = !{!13, !14, !15}
+!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10)
+!14 = !DILocalVariable(name: "B", arg: 2, scope: !7, file: !1, line: 1, type: !10)
+!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 2, type: !17)
+!16 = distinct !DILexicalBlock(scope: !7, file: !1, line: 2, column: 3)
+!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!18 = !DILocation(line: 0, scope: !7)
+!19 = !DILocation(line: 0, scope: !16)
+!20 = !DILocation(line: 2, column: 8, scope: !16)
+!21 = !DILocation(line: 2, column: 21, scope: !22)
+!22 = distinct !DILexicalBlock(scope: !16, file: !1, line: 2, column: 3)
+!23 = !DILocation(line: 2, column: 3, scope: !16)
+!24 = !DILocation(line: 3, column: 12, scope: !25)
+!25 = distinct !DILexicalBlock(scope: !22, file: !1, line: 2, column: 33)
+!26 = !DILocation(line: 3, column: 5, scope: !25)
+!27 = !DILocation(line: 3, column: 10, scope: !25)
+!28 = !{!29, !29, i64 0}
+!29 = !{!"double", !30, i64 0}
+!30 = !{!"omnipotent char", !31, i64 0}
+!31 = !{!"Simple C/C++ TBAA"}
+!32 = !DILocation(line: 4, column: 15, scope: !25)
+!33 = !DILocation(line: 4, column: 12, scope: !25)
+!34 = !DILocation(line: 4, column: 5, scope: !25)
+!35 = !DILocation(line: 4, column: 10, scope: !25)
+!36 = !DILocation(line: 2, column: 28, scope: !22)
+!37 = !DILocation(line: 2, column: 3, scope: !22)
+!38 = distinct !{!38, !23, !39, !40, !100, !101}
+!39 = !DILocation(line: 5, column: 3, scope: !16)
+!40 = !{!"llvm.loop.mustprogress"}
+!41 = !DILocation(line: 6, column: 1, scope: !7)
+!100 = !{!"llvm.loop.distribute.enable"}
+!101 = !{!"llvm.loop.distribute.loc", !102}
+!102 = !DILocation(line: 1, column: 42, scope: !16)


        


More information about the llvm-commits mailing list