[polly] r298510 - Introduce another level of metadata to distinguish non-aliasing accesses

Roman Gareev via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 22 07:25:24 PDT 2017


Author: romangareev
Date: Wed Mar 22 09:25:24 2017
New Revision: 298510

URL: http://llvm.org/viewvc/llvm-project?rev=298510&view=rev
Log:
Introduce another level of metadata to distinguish non-aliasing accesses

Introduce another level of alias metadata to distinguish the individual
non-aliasing accesses that have inter iteration alias-free base pointers
marked with "Inter iteration alias-free" mark nodes. It can be used to,
for example, distinguish different stores (loads) produced by unrolling of
the innermost loops and, subsequently, sink (hoist) them by LICM.

Reviewed-by: Tobias Grosser <tobias at grosser.es>

Differential Revision: https://reviews.llvm.org/D30606

Added:
    polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll
Modified:
    polly/trunk/include/polly/CodeGen/IRBuilder.h
    polly/trunk/lib/CodeGen/IRBuilder.cpp
    polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
    polly/trunk/lib/Transform/ScheduleOptimizer.cpp
    polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll
    polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll
    polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll

Modified: polly/trunk/include/polly/CodeGen/IRBuilder.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/IRBuilder.h?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/IRBuilder.h (original)
+++ polly/trunk/include/polly/CodeGen/IRBuilder.h Wed Mar 22 09:25:24 2017
@@ -80,7 +80,20 @@ public:
   /// Delete the set of alternative alias bases
   void resetAlternativeAliasBases() { AlternativeAliasBases.clear(); }
 
+  /// Add inter iteration alias-free base pointer @p BasePtr.
+  void addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr);
+
 private:
+  /// Annotate with the second level alias metadata
+  ///
+  /// Annotate the instruction @p I with the second level alias metadata
+  /// to distinguish the individual non-aliasing accesses that have inter
+  /// iteration alias-free base pointers.
+  ///
+  /// @param I The instruction to be annotated.
+  /// @param BasePtr The base pointer of @p I.
+  void annotateSecondLevel(llvm::Instruction *I, llvm::Value *BasePtr);
+
   /// The ScalarEvolution analysis we use to find base pointers.
   llvm::ScalarEvolution *SE;
 
@@ -100,6 +113,17 @@ private:
   llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
       OtherAliasScopeListMap;
 
+  /// A map from pointers to second level alias scopes.
+  llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
+      SecondLevelAliasScopeMap;
+
+  /// A map from pointers to second level alias scope list of other pointers.
+  llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
+      SecondLevelOtherAliasScopeListMap;
+
+  /// Inter iteration alias-free base pointers.
+  llvm::SmallPtrSet<llvm::Value *, 4> InterIterationAliasFreeBasePtrs;
+
   llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::AssertingVH<llvm::Value>>
       AlternativeAliasBases;
 };

Modified: polly/trunk/lib/CodeGen/IRBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IRBuilder.cpp?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IRBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IRBuilder.cpp Wed Mar 22 09:25:24 2017
@@ -115,6 +115,49 @@ void ScopAnnotator::annotateLoopLatch(Br
   B->setMetadata("llvm.loop", Id);
 }
 
+/// Get the pointer operand
+///
+/// @param Inst The instruction to be analyzed.
+/// @return the pointer operand in case @p Inst is a memory access
+///         instruction and nullptr otherwise.
+static llvm::Value *getMemAccInstPointerOperand(Instruction *Inst) {
+  auto MemInst = MemAccInst::dyn_cast(Inst);
+  if (!MemInst)
+    return nullptr;
+
+  return MemInst.getPointerOperand();
+}
+
+void ScopAnnotator::annotateSecondLevel(llvm::Instruction *Inst,
+                                        llvm::Value *BasePtr) {
+  auto *Ptr = getMemAccInstPointerOperand(Inst);
+  if (!Ptr)
+    return;
+  auto SecondLevelAliasScope = SecondLevelAliasScopeMap.lookup(Ptr);
+  auto SecondLevelOtherAliasScopeList =
+      SecondLevelOtherAliasScopeListMap.lookup(Ptr);
+  if (!SecondLevelAliasScope) {
+    auto AliasScope = AliasScopeMap.lookup(BasePtr);
+    if (!AliasScope)
+      return;
+    LLVMContext &Ctx = SE->getContext();
+    SecondLevelAliasScope = getID(
+        Ctx, AliasScope, MDString::get(Ctx, "second level alias metadata"));
+    SecondLevelAliasScopeMap[Ptr] = SecondLevelAliasScope;
+    Metadata *Args = {SecondLevelAliasScope};
+    auto SecondLevelBasePtrAliasScopeList =
+        SecondLevelAliasScopeMap.lookup(BasePtr);
+    SecondLevelAliasScopeMap[BasePtr] = MDNode::concatenate(
+        SecondLevelBasePtrAliasScopeList, MDNode::get(Ctx, Args));
+    auto OtherAliasScopeList = OtherAliasScopeListMap.lookup(BasePtr);
+    SecondLevelOtherAliasScopeList = MDNode::concatenate(
+        OtherAliasScopeList, SecondLevelBasePtrAliasScopeList);
+    SecondLevelOtherAliasScopeListMap[Ptr] = SecondLevelOtherAliasScopeList;
+  }
+  Inst->setMetadata("alias.scope", SecondLevelAliasScope);
+  Inst->setMetadata("noalias", SecondLevelOtherAliasScopeList);
+}
+
 void ScopAnnotator::annotate(Instruction *Inst) {
   if (!Inst->mayReadOrWriteMemory())
     return;
@@ -126,11 +169,7 @@ void ScopAnnotator::annotate(Instruction
   if (!AliasScopeDomain)
     return;
 
-  auto MemInst = MemAccInst::dyn_cast(Inst);
-  if (!MemInst)
-    return;
-
-  auto *Ptr = MemInst.getPointerOperand();
+  auto *Ptr = getMemAccInstPointerOperand(Inst);
   if (!Ptr)
     return;
 
@@ -162,6 +201,18 @@ void ScopAnnotator::annotate(Instruction
          "BasePtr either expected in AliasScopeMap and OtherAlias...Map");
   auto *OtherAliasScopeList = OtherAliasScopeListMap[BasePtr];
 
+  if (InterIterationAliasFreeBasePtrs.count(BasePtr)) {
+    annotateSecondLevel(Inst, BasePtr);
+    return;
+  }
+
   Inst->setMetadata("alias.scope", AliasScope);
   Inst->setMetadata("noalias", OtherAliasScopeList);
 }
+
+void ScopAnnotator::addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr) {
+  if (!BasePtr)
+    return;
+
+  InterIterationAliasFreeBasePtrs.insert(BasePtr);
+}

Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Wed Mar 22 09:25:24 2017
@@ -381,6 +381,10 @@ void IslNodeBuilder::createMark(__isl_ta
     isl_id_free(Id);
     return;
   }
+  if (!strcmp(isl_id_get_name(Id), "Inter iteration alias-free")) {
+    auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id));
+    Annotator.addInterIterationAliasFreeBasePtr(BasePtr);
+  }
   create(Child);
   isl_id_free(Id);
 }

Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Wed Mar 22 09:25:24 2017
@@ -1248,10 +1248,26 @@ isolateAndUnrollMatMulInnerLoops(__isl_t
   return Node;
 }
 
+/// Mark @p BasePtr with "Inter iteration alias-free" mark node.
+///
+/// @param Node The child of the mark node to be inserted.
+/// @param BasePtr The pointer to be marked.
+/// @return The modified isl_schedule_node.
+static isl_schedule_node *markInterIterationAliasFree(isl_schedule_node *Node,
+                                                      llvm::Value *BasePtr) {
+  if (!BasePtr)
+    return Node;
+
+  auto *Ctx = isl_schedule_node_get_ctx(Node);
+  auto *Id = isl_id_alloc(Ctx, "Inter iteration alias-free", BasePtr);
+  return isl_schedule_node_child(isl_schedule_node_insert_mark(Node, Id), 0);
+}
+
 __isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern(
     __isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI,
     MatMulInfoTy &MMI) {
   assert(TTI && "The target transform info should be provided.");
+  Node = markInterIterationAliasFree(Node, MMI.WriteToC->getLatestBaseAddr());
   int DimOutNum = isl_schedule_node_band_n_member(Node);
   assert(DimOutNum > 2 && "In case of the matrix multiplication the loop nest "
                           "and, consequently, the corresponding scheduling "

Modified: polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll Wed Mar 22 09:25:24 2017
@@ -27,6 +27,7 @@
 ; CHECK-NEXT:            for (int c3 = 0; c3 <= 31; c3 += 1)
 ; CHECK-NEXT:              Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
 ; CHECK-NEXT:        }
+; CHECK-NEXT:      // Inter iteration alias-free
 ; CHECK-NEXT:      // 1st level tiling - Tiles
 ; CHECK-NEXT:      for (int c1 = 0; c1 <= 3; c1 += 1) {
 ; CHECK-NEXT:        for (int c3 = 0; c3 <= 1055; c3 += 1)

Added: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll?rev=298510&view=auto
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll (added)
+++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll Wed Mar 22 09:25:24 2017
@@ -0,0 +1,70 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-invariant-load-hoisting=true \
+; RUN: -polly-pattern-matching-based-opts=true \
+; RUN: -polly-target-throughput-vector-fma=1 \
+; RUN: -polly-target-latency-vector-fma=1 \
+; RUN: -polly-codegen -polly-target-1st-cache-level-associativity=8 \
+; RUN: -polly-target-2nd-cache-level-associativity=8 \
+; RUN: -polly-target-1st-cache-level-size=32768 \
+; RUN: -polly-target-vector-register-bitwidth=256 \
+; RUN: -polly-target-2nd-cache-level-size=262144 -S < %s \
+; RUN: | FileCheck %s
+;
+; This test case checks whether Polly generates second level alias metadata
+; to distinguish the specific accesses in case of the ublas gemm kernel.
+;
+; CHECK: %tmp22_p_scalar_ = load double, double* %scevgep168, align 8, !alias.scope !10, !noalias !2
+; CHECK: store double %p_tmp23, double* %scevgep168, align 8, !alias.scope !10, !noalias !2
+; CHECK: %tmp22_p_scalar_188 = load double, double* %scevgep187, align 8, !alias.scope !11, !noalias !12
+; CHECK: store double %p_tmp23189, double* %scevgep187, align 8, !alias.scope !11, !noalias !12
+; CHECK: %tmp22_p_scalar_209 = load double, double* %scevgep208, align 8, !alias.scope !13, !noalias !14
+; CHECK: store double %p_tmp23210, double* %scevgep208, align 8, !alias.scope !13, !noalias !14
+; CHECK: %tmp22_p_scalar_230 = load double, double* %scevgep229, align 8, !alias.scope !15, !noalias !16
+; CHECK: store double %p_tmp23231, double* %scevgep229, align 8, !alias.scope !15, !noalias !16
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1024 x double]* %arg6, [1056 x double]* %arg7) {
+bb:
+  br label %bb8
+
+bb8:                                              ; preds = %bb29, %bb
+  %tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ]
+  br label %bb9
+
+bb9:                                              ; preds = %bb26, %bb8
+  %tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ]
+  %tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10
+  %tmp12 = load double, double* %tmp11, align 8
+  %tmp13 = fmul double %tmp12, %arg4
+  store double %tmp13, double* %tmp11, align 8
+  br label %Copy_0
+
+Copy_0:                                             ; preds = %Copy_0, %bb9
+  %tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ]
+  %tmp16 = getelementptr inbounds [1024 x double], [1024 x double]* %arg6, i64 %tmp, i64 %tmp15
+  %tmp17 = load double, double* %tmp16, align 8
+  %tmp18 = fmul double %tmp17, %arg3
+  %tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10
+  %tmp20 = load double, double* %tmp19, align 8
+  %tmp21 = fmul double %tmp18, %tmp20
+  %tmp22 = load double, double* %tmp11, align 8
+  %tmp23 = fadd double %tmp22, %tmp21
+  store double %tmp23, double* %tmp11, align 8
+  %tmp24 = add nuw nsw i64 %tmp15, 1
+  %tmp25 = icmp ne i64 %tmp24, 1024
+  br i1 %tmp25, label %Copy_0, label %bb26
+
+bb26:                                             ; preds = %Copy_0
+  %tmp27 = add nuw nsw i64 %tmp10, 1
+  %tmp28 = icmp ne i64 %tmp27, 1056
+  br i1 %tmp28, label %bb9, label %bb29
+
+bb29:                                             ; preds = %bb26
+  %tmp30 = add nuw nsw i64 %tmp, 1
+  %tmp31 = icmp ne i64 %tmp30, 1056
+  br i1 %tmp31, label %bb8, label %bb32
+
+bb32:                                             ; preds = %bb29
+  ret void
+}

Modified: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll Wed Mar 22 09:25:24 2017
@@ -33,6 +33,7 @@
 ; CHECK-NEXT:            for (int c3 = 0; c3 <= 31; c3 += 1)
 ; CHECK-NEXT:              Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
 ; CHECK-NEXT:        }
+; CHECK-NEXT:      // Inter iteration alias-free
 ; CHECK-NEXT:      // Register tiling - Tiles
 ; CHECK-NEXT:      for (int c0 = 0; c0 <= 131; c0 += 1)
 ; CHECK-NEXT:        for (int c1 = 0; c1 <= 263; c1 += 1)
@@ -84,6 +85,7 @@
 ; EXTRACTION-OF-MACRO-KERNEL-NEXT:            for (int c3 = 0; c3 <= 31; c3 += 1)
 ; EXTRACTION-OF-MACRO-KERNEL-NEXT:              Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
 ; EXTRACTION-OF-MACRO-KERNEL-NEXT:        }
+; EXTRACTION-OF-MACRO-KERNEL-NEXT:      // Inter iteration alias-free
 ; EXTRACTION-OF-MACRO-KERNEL-NEXT:      // 1st level tiling - Tiles
 ; EXTRACTION-OF-MACRO-KERNEL-NEXT:      for (int c1 = 0; c1 <= 3; c1 += 1) {
 ; EXTRACTION-OF-MACRO-KERNEL-NEXT:        for (int c3 = 0; c3 <= 1055; c3 += 1)

Modified: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll Wed Mar 22 09:25:24 2017
@@ -37,6 +37,7 @@
 ;	   C[i][j] += A[i][k] * B[k][j];
 ;
 ; CHECK:    if (ni >= 1) {
+; CHECK-NEXT:      // Inter iteration alias-free
 ; CHECK-NEXT:      // 1st level tiling - Tiles
 ; CHECK-NEXT:      for (int c0 = 0; c0 <= floord(nj - 1, 2048); c0 += 1)
 ; CHECK-NEXT:        for (int c1 = 0; c1 <= floord(nk - 1, 256); c1 += 1) {




More information about the llvm-commits mailing list