[polly] b85c98b - [Polly][Codegen] Emit access group metadata.

Michael Kruse via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 4 02:00:20 PST 2021


Author: Michael Kruse
Date: 2021-03-04T03:58:03-06:00
New Revision: b85c98b4c5734b8570e1392fb15d2f9fc2581d6e

URL: https://github.com/llvm/llvm-project/commit/b85c98b4c5734b8570e1392fb15d2f9fc2581d6e
DIFF: https://github.com/llvm/llvm-project/commit/b85c98b4c5734b8570e1392fb15d2f9fc2581d6e.diff

LOG: [Polly][Codegen] Emit access group metadata.

Emit llvm.loop.parallel_accesses metadata instead of
llvm.mem.parallel_loop_access. The latter is deprecated because it
assumes that LoopIDs are persistent, which they are not.
We also emit parallel access metadata for all surrounding parallel
loops, not just the innermost parallel.

Added: 
    

Modified: 
    polly/include/polly/CodeGen/IRBuilder.h
    polly/lib/CodeGen/IRBuilder.cpp
    polly/test/CodeGen/stride_detection.ll
    polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
    polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
    polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
    polly/test/Isl/CodeGen/getNumberOfIterations.ll

Removed: 
    


################################################################################
diff  --git a/polly/include/polly/CodeGen/IRBuilder.h b/polly/include/polly/CodeGen/IRBuilder.h
index fb5d14a8b88b..94b86432952d 100644
--- a/polly/include/polly/CodeGen/IRBuilder.h
+++ b/polly/include/polly/CodeGen/IRBuilder.h
@@ -100,7 +100,7 @@ class ScopAnnotator {
   /// All loops currently under construction.
   llvm::SmallVector<llvm::Loop *, 8> ActiveLoops;
 
-  /// Metadata pointing to parallel loops currently under construction.
+  /// Access groups for the parallel loops currently under construction.
   llvm::SmallVector<llvm::MDNode *, 8> ParallelLoops;
 
   /// The alias scope domain for the current SCoP.

diff  --git a/polly/lib/CodeGen/IRBuilder.cpp b/polly/lib/CodeGen/IRBuilder.cpp
index 001a907a834f..374f2ea7211f 100644
--- a/polly/lib/CodeGen/IRBuilder.cpp
+++ b/polly/lib/CodeGen/IRBuilder.cpp
@@ -94,51 +94,52 @@ void ScopAnnotator::buildAliasScopes(Scop &S) {
 }
 
 void ScopAnnotator::pushLoop(Loop *L, bool IsParallel) {
-
   ActiveLoops.push_back(L);
-  if (!IsParallel)
-    return;
 
-  BasicBlock *Header = L->getHeader();
-  MDNode *Id = getID(Header->getContext());
-  assert(Id->getOperand(0) == Id && "Expected Id to be a self-reference");
-  assert(Id->getNumOperands() == 1 && "Unexpected extra operands in Id");
-  MDNode *Ids = ParallelLoops.empty()
-                    ? Id
-                    : MDNode::concatenate(ParallelLoops.back(), Id);
-  ParallelLoops.push_back(Ids);
+  if (IsParallel) {
+    LLVMContext &Ctx = SE->getContext();
+    MDNode *AccessGroup = MDNode::getDistinct(Ctx, {});
+    ParallelLoops.push_back(AccessGroup);
+  }
 }
 
 void ScopAnnotator::popLoop(bool IsParallel) {
   ActiveLoops.pop_back();
-  if (!IsParallel)
-    return;
 
-  assert(!ParallelLoops.empty() && "Expected a parallel loop to pop");
-  ParallelLoops.pop_back();
+  if (IsParallel) {
+    assert(!ParallelLoops.empty() && "Expected a parallel loop to pop");
+    ParallelLoops.pop_back();
+  }
 }
 
 void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
                                       bool IsLoopVectorizerDisabled) const {
-  MDNode *MData = nullptr;
+  LLVMContext &Ctx = SE->getContext();
+  SmallVector<Metadata *, 3> Args;
+
+  // For the LoopID self-reference.
+  Args.push_back(nullptr);
 
   if (IsLoopVectorizerDisabled) {
-    SmallVector<Metadata *, 3> Args;
-    LLVMContext &Ctx = SE->getContext();
-    Args.push_back(MDString::get(Ctx, "llvm.loop.vectorize.enable"));
-    auto *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
-    Args.push_back(ValueAsMetadata::get(FalseValue));
-    MData = MDNode::concatenate(MData, getID(Ctx, MDNode::get(Ctx, Args)));
+    MDString *PropName = MDString::get(Ctx, "llvm.loop.vectorize.enable");
+    ConstantInt *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
+    ValueAsMetadata *PropValue = ValueAsMetadata::get(FalseValue);
+    Args.push_back(MDNode::get(Ctx, {PropName, PropValue}));
   }
 
   if (IsParallel) {
-    assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate");
-    MDNode *Ids = ParallelLoops.back();
-    MDNode *Id = cast<MDNode>(Ids->getOperand(Ids->getNumOperands() - 1));
-    MData = MDNode::concatenate(MData, Id);
+    MDString *PropName = MDString::get(Ctx, "llvm.loop.parallel_accesses");
+    MDNode *AccGroup = ParallelLoops.back();
+    Args.push_back(MDNode::get(Ctx, {PropName, AccGroup}));
   }
 
-  B->setMetadata("llvm.loop", MData);
+  // No metadata to annotate.
+  if (Args.size() <= 1)
+    return;
+
+  MDNode *MData = MDNode::getDistinct(Ctx, Args);
+  MData->replaceOperandWith(0, MData);
+  B->setMetadata(LLVMContext::MD_loop, MData);
 }
 
 /// Get the pointer operand
@@ -214,8 +215,24 @@ void ScopAnnotator::annotate(Instruction *Inst) {
   if (!Inst->mayReadOrWriteMemory())
     return;
 
-  if (!ParallelLoops.empty())
-    Inst->setMetadata("llvm.mem.parallel_loop_access", ParallelLoops.back());
+  switch (ParallelLoops.size()) {
+  case 0:
+    // Not parallel to anything: no access group needed.
+    break;
+  case 1:
+    // Single parallel loop: use directly.
+    Inst->setMetadata(LLVMContext::MD_access_group,
+                      cast<MDNode>(ParallelLoops.front()));
+    break;
+  default:
+    // Parallel to multiple loops: refer to list of access groups.
+    Inst->setMetadata(LLVMContext::MD_access_group,
+                      MDNode::get(SE->getContext(),
+                                  ArrayRef<Metadata *>(
+                                      (Metadata *const *)ParallelLoops.data(),
+                                      ParallelLoops.size())));
+    break;
+  }
 
   // TODO: Use the ScopArrayInfo once available here.
   if (!AliasScopeDomain)

diff  --git a/polly/test/CodeGen/stride_detection.ll b/polly/test/CodeGen/stride_detection.ll
index da0abb085687..0bbaaa369881 100644
--- a/polly/test/CodeGen/stride_detection.ll
+++ b/polly/test/CodeGen/stride_detection.ll
@@ -10,13 +10,13 @@
 ;             Stmt_for_body_3(32 * c0 + 4 * c2 + c4, 32 * c1 + c3);
 
 ; CHECK: polly.stmt.for.body.3:                            ; preds = %polly.loop_header18
-; CHECK:   %_p_splat_one = load <1 x double>, <1 x double>* %_p_vec_p, align 8, !alias.scope !1, !noalias !3, !llvm.mem.parallel_loop_access !0
-; CHECK:   %_p_vec_full = load <4 x double>, <4 x double>* %vector_ptr, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0
+; CHECK:   %_p_splat_one = load <1 x double>, <1 x double>* %_p_vec_p, align 8, !alias.scope !3, !noalias !5, !llvm.access.group !2
+; CHECK:   %_p_vec_full = load <4 x double>, <4 x double>* %vector_ptr, align 8, !alias.scope !6, !noalias !7, !llvm.access.group !2
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 0
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 1
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 2
 ; CHECK:   extractelement <4 x double> %addp_vec, i32 3
-; CHECK:   store <4 x double> %addp_vec, <4 x double>* {{.*}}, align 8, !alias.scope !4, !noalias !5, !llvm.mem.parallel_loop_access !0
+; CHECK:   store <4 x double> %addp_vec, <4 x double>* {{.*}}, align 8, !alias.scope !6, !noalias !7, !llvm.access.group !2
 
 define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, [1024 x double]* %C, [1024 x double]* %A) #0 {
 entry:

diff  --git a/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll b/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
index 49798ba3a284..617aaa755535 100644
--- a/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
+++ b/polly/test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll
@@ -8,11 +8,17 @@
 ; CHECK-DAG:  %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar_next{{[0-9]*}}, 511
 ; CHECK-DAG:  br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDInner:[0-9]*]]
 ;
-; CHECK-DAG: store i32 %{{[a-z_0-9]*}}, i32* %{{[a-z_0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access !4
+; CHECK-DAG: store i32 %{{[a-z_0-9]*}}, i32* %{{[a-z_0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPLST6:[0-9]+]]
+;
+; CHECK-DAG: ![[IDOuter]] = distinct !{![[IDOuter]], ![[ACCGROUP1:[0-9]+]]}
+; CHECK-DAG: ![[ACCGROUP1]] = !{!"llvm.loop.parallel_accesses", ![[GROUP2:[0-9]+]]}
+; CHECK-DAG: ![[GROUP2]] = distinct !{}
+; CHECK-DAG: ![[GROUPLST6]] = !{![[GROUP2]], ![[GROUP7:[0-9]+]]}
+; CHECK-DAG: ![[GROUP7]] = distinct !{}
+; CHECK-DAG: ![[IDInner]] = distinct !{![[IDInner]], ![[ACCGROUP9:[0-9]+]]}
+; CHECK-DAG: ![[ACCGROUP9]] = !{!"llvm.loop.parallel_accesses", ![[GROUP7]]}
+
 ;
-; CHECK-DAG: ![[IDOuter]] = distinct !{![[IDOuter]]}
-; CHECK-DAG: ![[IDInner]] = distinct !{![[IDInner]]}
-; CHECK-DAG: !4 = !{![[IDOuter]], ![[IDInner]]}
 ;
 ;    void jd(int *A) {
 ;      for (int i = 0; i < 1024; i++)

diff  --git a/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll b/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
index 7555d84de74d..ce96eefbbf51 100644
--- a/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
+++ b/polly/test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll
@@ -33,13 +33,15 @@ ret:
   ret void
 }
 
-; SEQUENTIAL: @test-one
+; SEQUENTIAL-LABEL: @test-one
 ; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access
+; SEQUENTIAL-NOT: !llvm.access.group
 ; SEQUENTIAL-NOT: !llvm.loop
 
 ; PARALLEL: @test-one
-; PARALLEL: store i32 1, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]]
-; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]]
+; PARALLEL: store i32 1, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPID3:[0-9]+]]
+; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID4:[0-9]+]]
+
 
 ; This loop has memory dependences that require at least a simple dependence
 ; analysis to detect the parallelism.
@@ -76,11 +78,18 @@ ret:
   ret void
 }
 
-; SEQUENTIAL: @test-two
+; SEQUENTIAL-LABEL: @test-two
 ; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access
+; SEQUENTIAL-NOT: !llvm.access.group
 ; SEQUENTIAL-NOT: !llvm.loop
 
 ; PARALLEL: @test-two
-; PARALLEL: %val_p_scalar_ = load i32, i32* %scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]]
-; PARALLEL: store i32 %val_p_scalar_, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID]]
-; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]]
+; PARALLEL: %val_p_scalar_ = load i32, i32* %scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPID8:[0-9]*]]
+; PARALLEL: store i32 %val_p_scalar_, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.access.group ![[GROUPID8]]
+; PARALLEL:  br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID9:[0-9]*]]
+
+
+; PARALLEL: ![[LoopID4]] = distinct !{![[LoopID4]], ![[PARACC5:[0-9]+]]}
+; PARALLEL: ![[PARACC5]] = !{!"llvm.loop.parallel_accesses", ![[GROUPID3]]}
+; PARALLEL: ![[LoopID9]] = distinct !{![[LoopID9]], ![[PARACC10:[0-9]+]]}
+; PARALLEL: ![[PARACC10]] = !{!"llvm.loop.parallel_accesses", ![[GROUPID8]]}

diff  --git a/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll b/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
index 02dce821c063..b28a914fc730 100644
--- a/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
+++ b/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll
@@ -23,13 +23,13 @@
 ; IR: %6 = add nsw i64 %polly.indvar5, 13
 ; IR: %polly.access.add.polly.subfunc.arg.A = add nsw i64 %polly.access.mul.polly.subfunc.arg.A, %6
 ; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A
-; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
+; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 
 ; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m
 ; IR: %7 = add nsw i64 %polly.indvar5, 43
 ; IR: %polly.access.add.polly.subfunc.arg.A9 = add nsw i64 %polly.access.mul.polly.subfunc.arg.A8, %7
 ; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9
-; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_
+; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @new_multidim_access(i64 %n, i64 %m, float* %A) {

diff  --git a/polly/test/Isl/CodeGen/getNumberOfIterations.ll b/polly/test/Isl/CodeGen/getNumberOfIterations.ll
index dc7ad9d3a5ec..a1eec2dc5e2d 100644
--- a/polly/test/Isl/CodeGen/getNumberOfIterations.ll
+++ b/polly/test/Isl/CodeGen/getNumberOfIterations.ll
@@ -8,9 +8,9 @@
 ; CHECK: polly.stmt.if.then:                               ; preds = %polly.loop_header
 ; CHECK:   %p_conv = sitofp i64 %polly.indvar to float
 ; CHECK:   %scevgep = getelementptr float, float* %A, i64 %polly.indvar
-; CHECK:   %_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
+; CHECK:   %_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 ; CHECK:   %p_add = fadd float %p_conv, %_p_scalar_
-; CHECK:   store float %p_add, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
+; CHECK:   store float %p_add, float* %scevgep, align 4, !alias.scope !0, !noalias !2, !llvm.access.group !3
 
 define void @foo(float* %A, i64 %N) #0 {
 entry:


        


More information about the llvm-commits mailing list