[polly] 76672e3 - [Polly] Add vectorize metadata to loops identified as vectorizable by polly (#113994)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 22 01:05:30 PST 2025


Author: Karthika Devi C
Date: 2025-01-22T14:35:26+05:30
New Revision: 76672e3349bbc7bc58b0ae93d5cc994f3e16971a

URL: https://github.com/llvm/llvm-project/commit/76672e3349bbc7bc58b0ae93d5cc994f3e16971a
DIFF: https://github.com/llvm/llvm-project/commit/76672e3349bbc7bc58b0ae93d5cc994f3e16971a.diff

LOG: [Polly] Add vectorize metadata to loops identified as vectorizable by polly (#113994)

This patch introduces the initial implementation for annotating loops
created by Polly. Polly generates RunTimeChecks (RTCs), which result in
loop versioning. Specifically, the loop created by Polly is executed
when the RTCs pass, otherwise, the original loop is executed.

This patch adds the "llvm.loop.vectorize.enable" metadata, setting it to
true for loops created by Polly. Disabling vectorization for the original
fallback loop is already merged in #119188.

This behavior is controlled by the 'polly-annotate-metadata-vectorize'
flag, and the annotations are applied only when this flag is enabled.
This flag is set to false by default.

NOTE: This commit is initial patch in effort to make polly interact with
Loop Vectorizer via metadata.

---------

Co-authored-by: Michael Kruse <github at meinersbur.de>

Added: 
    polly/test/CodeGen/Metadata/basic_vec_annotate.ll

Modified: 
    polly/include/polly/CodeGen/IRBuilder.h
    polly/lib/CodeGen/IRBuilder.cpp
    polly/lib/CodeGen/LoopGenerators.cpp

Removed: 
    


################################################################################
diff  --git a/polly/include/polly/CodeGen/IRBuilder.h b/polly/include/polly/CodeGen/IRBuilder.h
index ffca887fbc09aa..6641ac9a0c0684 100644
--- a/polly/include/polly/CodeGen/IRBuilder.h
+++ b/polly/include/polly/CodeGen/IRBuilder.h
@@ -58,9 +58,12 @@ class ScopAnnotator {
   /// Annotate the new instruction @p I for all parallel loops.
   void annotate(llvm::Instruction *I);
 
-  /// Annotate the loop latch @p B wrt. @p L.
-  void annotateLoopLatch(llvm::BranchInst *B, llvm::Loop *L, bool IsParallel,
-                         bool IsLoopVectorizerDisabled) const;
+  /// Annotate the loop latch @p B.
+  /// Last argument is optional, if no value is passed, we don't annotate
+  /// any vectorize metadata.
+  void annotateLoopLatch(
+      llvm::BranchInst *B, bool IsParallel,
+      std::optional<bool> EnableVectorizeMetadata = std::nullopt) const;
 
   /// Add alternative alias based pointers
   ///

diff  --git a/polly/lib/CodeGen/IRBuilder.cpp b/polly/lib/CodeGen/IRBuilder.cpp
index 2285c746912f4e..782b4b77e4ee49 100644
--- a/polly/lib/CodeGen/IRBuilder.cpp
+++ b/polly/lib/CodeGen/IRBuilder.cpp
@@ -128,8 +128,26 @@ void ScopAnnotator::popLoop(bool IsParallel) {
   LoopAttrEnv.pop_back();
 }
 
-void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
-                                      bool IsLoopVectorizerDisabled) const {
+static void addVectorizeMetadata(LLVMContext &Ctx,
+                                 SmallVector<Metadata *, 3> *Args,
+                                 bool EnableLoopVectorizer) {
+  MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
+  ConstantInt *Value =
+      ConstantInt::get(Type::getInt1Ty(Ctx), EnableLoopVectorizer);
+  ValueAsMetadata *PropValue = ValueAsMetadata::get(Value);
+  Args->push_back(MDNode::get(Ctx, {PropName, PropValue}));
+}
+
+void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
+                         llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops) {
+  MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
+  MDNode *AccGroup = ParallelLoops.back();
+  Args->push_back(MDNode::get(Ctx, {PropName, AccGroup}));
+}
+
+void ScopAnnotator::annotateLoopLatch(
+    BranchInst *B, bool IsParallel,
+    std::optional<bool> EnableVectorizeMetadata) const {
   LLVMContext &Ctx = SE->getContext();
   SmallVector<Metadata *, 3> Args;
 
@@ -145,19 +163,10 @@ void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
     if (MData)
       llvm::append_range(Args, drop_begin(MData->operands(), 1));
   }
-
-  if (IsLoopVectorizerDisabled) {
-    MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
-    ConstantInt *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
-    ValueAsMetadata *PropValue = ValueAsMetadata::get(FalseValue);
-    Args.push_back(MDNode::get(Ctx, {PropName, PropValue}));
-  }
-
-  if (IsParallel) {
-    MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
-    MDNode *AccGroup = ParallelLoops.back();
-    Args.push_back(MDNode::get(Ctx, {PropName, AccGroup}));
-  }
+  if (IsParallel)
+    addParallelMetadata(Ctx, &Args, ParallelLoops);
+  if (EnableVectorizeMetadata.has_value())
+    addVectorizeMetadata(Ctx, &Args, *EnableVectorizeMetadata);
 
   // No metadata to annotate.
   if (!MData && Args.size() <= 1)

diff  --git a/polly/lib/CodeGen/LoopGenerators.cpp b/polly/lib/CodeGen/LoopGenerators.cpp
index b4f8bb8948c282..5f772170d96282 100644
--- a/polly/lib/CodeGen/LoopGenerators.cpp
+++ b/polly/lib/CodeGen/LoopGenerators.cpp
@@ -35,6 +35,11 @@ static cl::opt<int, true>
                      cl::Hidden, cl::location(polly::PollyNumThreads),
                      cl::init(0), cl::cat(PollyCategory));
 
+cl::opt<bool> PollyVectorizeMetadata(
+    "polly-annotate-metadata-vectorize",
+    cl::desc("Append vectorize enable/disable metadata from polly"),
+    cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+
 static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
     "polly-scheduling",
     cl::desc("Scheduling type of parallel OpenMP for loops"),
@@ -159,8 +164,19 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
 
   // Create the loop latch and annotate it as such.
   BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
-  if (Annotator)
-    Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
+
+  // Don't annotate vectorize metadata when both LoopVectDisabled and
+  // PollyVectorizeMetadata are disabled. Annotate vectorize metadata to false
+  // when LoopVectDisabled is true. Otherwise we annotate the vectorize metadata
+  // to true.
+  if (Annotator) {
+    std::optional<bool> EnableVectorizeMetadata;
+    if (LoopVectDisabled)
+      EnableVectorizeMetadata = false;
+    else if (PollyVectorizeMetadata)
+      EnableVectorizeMetadata = true;
+    Annotator->annotateLoopLatch(B, Parallel, EnableVectorizeMetadata);
+  }
 
   IV->addIncoming(IncrementedIV, HeaderBB);
   if (GuardBB)

diff  --git a/polly/test/CodeGen/Metadata/basic_vec_annotate.ll b/polly/test/CodeGen/Metadata/basic_vec_annotate.ll
new file mode 100644
index 00000000000000..ebe91636ea3cc2
--- /dev/null
+++ b/polly/test/CodeGen/Metadata/basic_vec_annotate.ll
@@ -0,0 +1,61 @@
+; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s
+
+; Basic verification of vectorize metadata getting added when "-polly-vectorize-metadata" is
+; passed.
+
+; void add(int *A, int *B, int *C,int n) {
+;    for(int i=0; i<n; i++)
+;      C[i] += A[i] + B[i];
+; }
+
+; CHECK: for.body:
+; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
+; CHECK: polly.stmt.for.body:
+; CHECK: br {{.*}} !llvm.loop [[POLLY_LOOP:![0-9]+]]
+; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i32 0}
+; CHECK: [[POLLY_LOOP]] = distinct !{[[POLLY_LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i1 true}
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
+define dso_local void @add(ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef %C, i32 noundef %n) local_unnamed_addr #0 {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry.split
+  %wide.trip.count = zext nneg i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry.split
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+  %2 = load i32, ptr %arrayidx4, align 4
+  %add5 = add nsw i32 %add, %2
+  store i32 %add5, ptr %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0
+}
+
+attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+aes,+crc,+fp-armv8,+neon,+outline-atomics,+perfmon,+sha2,+v8a,-fmv" }
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.mustprogress"}


        


More information about the llvm-commits mailing list