[polly] [Polly] Add vectorize metadata to loops identified as vectorizable by polly (PR #113994)
Karthika Devi C via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 28 18:57:24 PDT 2024
https://github.com/kartcq created https://github.com/llvm/llvm-project/pull/113994
This patch introduces the initial implementation for annotating loops created by Polly. Polly generates RunTimeChecks (RTCs), which result in loop versioning. Specifically, the loop created by Polly is executed when the RTCs pass, otherwise, the original loop is executed.
This patch adds the "llvm.loop.vectorize.enable" metadata, setting it to true for loops created by Polly. It also disables vectorization for the original fallback loop.
This behavior is controlled by the 'polly-annotate-metadata-vectorize' flag, and the annotations are applied only when this flag is enabled. This flag is set to false by default.
NOTE: This commit is initial patch in effort to make polly interact with Loop Vectorizer via metadata.
>From 337ef3625408826e3359be89d36aa5062bd22088 Mon Sep 17 00:00:00 2001
From: Karthika Devi C <quic_kartc at quicinc.com>
Date: Mon, 22 Apr 2024 03:18:08 -0700
Subject: [PATCH] [Polly] Add vectorize metadata to loops identified as
vectorizable by polly
This patch introduces the initial implementation for annotating
loops created by Polly. Polly generates RunTimeChecks (RTCs), which
result in loop versioning. Specifically, the loop created by Polly
is executed when the RTCs pass, otherwise, the original loop is executed.
This patch adds the "llvm.loop.vectorize.enable" metadata, setting
it to true for loops created by Polly. It also disables vectorization
for the original fallback loop.
This behavior is controlled by the 'polly-annotate-metadata-vectorize'
flag, and the annotations are applied only when this flag is enabled.
This flag is set to false by default.
NOTE: This commit is initial patch in effort to make polly interact with
Loop Vectorizer via metadata.
---
polly/include/polly/CodeGen/IRBuilder.h | 3 +-
polly/lib/CodeGen/CodeGeneration.cpp | 25 ++++++++
polly/lib/CodeGen/IRBuilder.cpp | 36 ++++++-----
polly/lib/CodeGen/LoopGenerators.cpp | 18 +++++-
.../CodeGen/Metadata/basic_vec_annotate.ll | 61 +++++++++++++++++++
5 files changed, 126 insertions(+), 17 deletions(-)
create mode 100644 polly/test/CodeGen/Metadata/basic_vec_annotate.ll
diff --git a/polly/include/polly/CodeGen/IRBuilder.h b/polly/include/polly/CodeGen/IRBuilder.h
index ffca887fbc09aa..73571dbcf1f2bd 100644
--- a/polly/include/polly/CodeGen/IRBuilder.h
+++ b/polly/include/polly/CodeGen/IRBuilder.h
@@ -60,7 +60,8 @@ class ScopAnnotator {
/// Annotate the loop latch @p B wrt. @p L.
void annotateLoopLatch(llvm::BranchInst *B, llvm::Loop *L, bool IsParallel,
- bool IsLoopVectorizerDisabled) const;
+ bool setVectorizeMetadata,
+ bool EnableLoopVectorizer) const;
/// Add alternative alias based pointers
///
diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp
index 8813cfd959ef6e..bfc4014c848d5b 100644
--- a/polly/lib/CodeGen/CodeGeneration.cpp
+++ b/polly/lib/CodeGen/CodeGeneration.cpp
@@ -54,6 +54,11 @@ static cl::opt<bool> Verify("polly-codegen-verify",
cl::desc("Verify the function generated by Polly"),
cl::Hidden, cl::cat(PollyCategory));
+cl::opt<bool> PollyVectorizeMetadata(
+ "polly-annotate-metadata-vectorize"
+ cl::desc("Append vectorize enable/disable metadata from polly"),
+ cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
+
bool polly::PerfMonitoring;
static cl::opt<bool, true>
@@ -233,6 +238,26 @@ static bool generateCode(Scop &S, IslAstInfo &AI, LoopInfo &LI,
NodeBuilder.allocateNewArrays(StartExitBlocks);
Annotator.buildAliasScopes(S);
+ // The code below annotates the "llvm.loop.vectorize.enable" to false
+ // for the code flow taken when RTCs fail. Because we don't want the
+ // Loop Vectorizer to come in later and vectorize the original fall back
+ // loop when 'polly-annotate-metadata-vectorize' is passed.
+ if (PollyVectorizeMetadata && &Annotator) {
+ for (Loop *L : LI.getLoopsInPreorder()) {
+ if (S.contains(L)) {
+ Annotator.pushLoop(L, false);
+ SmallVector<BasicBlock *, 4> LoopLatchBlocks;
+ L->getLoopLatches(LoopLatchBlocks);
+ for (BasicBlock *ControlBB : LoopLatchBlocks) {
+ BranchInst *Br = dyn_cast<BranchInst>(ControlBB->getTerminator());
+ if (Br)
+ Annotator.annotateLoopLatch(Br, L, false, true, false);
+ }
+ Annotator.popLoop(false);
+ }
+ }
+ }
+
if (PerfMonitoring) {
PerfMonitor P(S, EnteringBB->getParent()->getParent());
P.initialize();
diff --git a/polly/lib/CodeGen/IRBuilder.cpp b/polly/lib/CodeGen/IRBuilder.cpp
index 2285c746912f4e..4e01b86da563a8 100644
--- a/polly/lib/CodeGen/IRBuilder.cpp
+++ b/polly/lib/CodeGen/IRBuilder.cpp
@@ -128,8 +128,25 @@ void ScopAnnotator::popLoop(bool IsParallel) {
LoopAttrEnv.pop_back();
}
+void addVectorizeMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
+ bool EnableLoopVectorizer) {
+ MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
+ ConstantInt *Value =
+ ConstantInt::get(Type::getInt1Ty(Ctx), EnableLoopVectorizer);
+ ValueAsMetadata *PropValue = ValueAsMetadata::get(Value);
+ Args->push_back(MDNode::get(Ctx, {PropName, PropValue}));
+}
+
+void addParallelMetadata(LLVMContext &Ctx, SmallVector<Metadata *, 3> *Args,
+ llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops) {
+ MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
+ MDNode *AccGroup = ParallelLoops.back();
+ Args->push_back(MDNode::get(Ctx, {PropName, AccGroup}));
+}
+
void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
- bool IsLoopVectorizerDisabled) const {
+ bool setVectorizeMetadata,
+ bool EnableLoopVectorizer) const {
LLVMContext &Ctx = SE->getContext();
SmallVector<Metadata *, 3> Args;
@@ -145,19 +162,10 @@ void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
if (MData)
llvm::append_range(Args, drop_begin(MData->operands(), 1));
}
-
- if (IsLoopVectorizerDisabled) {
- MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
- ConstantInt *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
- ValueAsMetadata *PropValue = ValueAsMetadata::get(FalseValue);
- Args.push_back(MDNode::get(Ctx, {PropName, PropValue}));
- }
-
- if (IsParallel) {
- MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
- MDNode *AccGroup = ParallelLoops.back();
- Args.push_back(MDNode::get(Ctx, {PropName, AccGroup}));
- }
+ if (IsParallel)
+ addParallelMetadata(Ctx, &Args, ParallelLoops);
+ if (setVectorizeMetadata)
+ addVectorizeMetadata(Ctx, &Args, EnableLoopVectorizer);
// No metadata to annotate.
if (!MData && Args.size() <= 1)
diff --git a/polly/lib/CodeGen/LoopGenerators.cpp b/polly/lib/CodeGen/LoopGenerators.cpp
index b4f8bb8948c282..c082b61096250d 100644
--- a/polly/lib/CodeGen/LoopGenerators.cpp
+++ b/polly/lib/CodeGen/LoopGenerators.cpp
@@ -14,6 +14,7 @@
#include "polly/CodeGen/LoopGenerators.h"
#include "polly/Options.h"
#include "polly/ScopDetection.h"
+#include "polly/ScopInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -35,6 +36,8 @@ static cl::opt<int, true>
cl::Hidden, cl::location(polly::PollyNumThreads),
cl::init(0), cl::cat(PollyCategory));
+extern cl::opt<bool> PollyVectorizeMetadata;
+
static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
"polly-scheduling",
cl::desc("Scheduling type of parallel OpenMP for loops"),
@@ -159,8 +162,19 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
// Create the loop latch and annotate it as such.
BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
- if (Annotator)
- Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
+
+ // If the 'polly-annotate-metadata-vectorize' flag is passed, we add
+ // the vectorize metadata. Otherwise we fall back to previous behavior
+ // of annotating the loop only when LoopVectDisabled is true.
+ if (Annotator) {
+ if (PollyVectorizeMetadata)
+ Annotator->annotateLoopLatch(B, NewLoop, Parallel, true,
+ !LoopVectDisabled);
+ else if (LoopVectDisabled)
+ Annotator->annotateLoopLatch(B, NewLoop, Parallel, true, false);
+ else
+ Annotator->annotateLoopLatch(B, NewLoop, Parallel, false, false);
+ }
IV->addIncoming(IncrementedIV, HeaderBB);
if (GuardBB)
diff --git a/polly/test/CodeGen/Metadata/basic_vec_annotate.ll b/polly/test/CodeGen/Metadata/basic_vec_annotate.ll
new file mode 100644
index 00000000000000..b4b1a5c8160264
--- /dev/null
+++ b/polly/test/CodeGen/Metadata/basic_vec_annotate.ll
@@ -0,0 +1,61 @@
+; RUN: opt %loadNPMPolly -S -passes=polly-codegen -polly-annotate-metadata-vectorize < %s | FileCheck %s
+
+; Basic verification of vectorize metadata getting added when "-polly-vectorize-metadata" is
+; passed.
+
+; void add(int *A, int *B, int *C,int n) {
+; for(int i=0; i<n; i++)
+; C[i] += A[i] + B[i];
+; }
+
+; CHECK: for.body:
+; CHECK: br {{.*}} !llvm.loop [[LOOP:![0-9]+]]
+; CHECK: polly.stmt.for.body:
+; CHECK: br {{.*}} !llvm.loop [[POLLY_LOOP:![0-9]+]]
+; CHECK: [[LOOP]] = distinct !{[[LOOP]], [[META2:![0-9]+]]}
+; CHECK: [[META2]] = !{!"llvm.loop.vectorize.enable", i1 false}
+; CHECK: [[POLLY_LOOP]] = distinct !{[[POLLY_LOOP]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META3]] = !{!"llvm.loop.vectorize.enable", i1 true}
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
+define dso_local void @add(ptr nocapture noundef readonly %A, ptr nocapture noundef readonly %B, ptr nocapture noundef %C, i32 noundef %n) local_unnamed_addr #0 {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ %cmp10 = icmp sgt i32 %n, 0
+ br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader: ; preds = %entry.split
+ %wide.trip.count = zext nneg i32 %n to i64
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry.split
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
+ %1 = load i32, ptr %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ %arrayidx4 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv
+ %2 = load i32, ptr %arrayidx4, align 4
+ %add5 = add nsw i32 %add, %2
+ store i32 %add5, ptr %arrayidx4, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0
+}
+
+attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a57" "target-features"="+aes,+crc,+fp-armv8,+neon,+outline-atomics,+perfmon,+sha2,+v8a,-fmv" }
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.mustprogress"}
More information about the llvm-commits
mailing list