[polly] r298510 - Introduce another level of metadata to distinguish non-aliasing accesses
Roman Gareev via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 22 07:25:24 PDT 2017
Author: romangareev
Date: Wed Mar 22 09:25:24 2017
New Revision: 298510
URL: http://llvm.org/viewvc/llvm-project?rev=298510&view=rev
Log:
Introduce another level of metadata to distinguish non-aliasing accesses
Introduce another level of alias metadata to distinguish the individual
non-aliasing accesses that have inter iteration alias-free base pointers
marked with "Inter iteration alias-free" mark nodes. It can be used to,
for example, distinguish different stores (loads) produced by unrolling of
the innermost loops and, subsequently, sink (hoist) them by LICM.
Reviewed-by: Tobias Grosser <tobias at grosser.es>
Differential Revision: https://reviews.llvm.org/D30606
Added:
polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll
Modified:
polly/trunk/include/polly/CodeGen/IRBuilder.h
polly/trunk/lib/CodeGen/IRBuilder.cpp
polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
polly/trunk/lib/Transform/ScheduleOptimizer.cpp
polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll
polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll
polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
Modified: polly/trunk/include/polly/CodeGen/IRBuilder.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/IRBuilder.h?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/IRBuilder.h (original)
+++ polly/trunk/include/polly/CodeGen/IRBuilder.h Wed Mar 22 09:25:24 2017
@@ -80,7 +80,20 @@ public:
/// Delete the set of alternative alias bases
void resetAlternativeAliasBases() { AlternativeAliasBases.clear(); }
+ /// Add inter iteration alias-free base pointer @p BasePtr.
+ void addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr);
+
private:
+ /// Annotate with the second level alias metadata
+ ///
+ /// Annotate the instruction @p I with the second level alias metadata
+ /// to distinguish the individual non-aliasing accesses that have inter
+ /// iteration alias-free base pointers.
+ ///
+ /// @param I The instruction to be annotated.
+ /// @param BasePtr The base pointer of @p I.
+ void annotateSecondLevel(llvm::Instruction *I, llvm::Value *BasePtr);
+
/// The ScalarEvolution analysis we use to find base pointers.
llvm::ScalarEvolution *SE;
@@ -100,6 +113,17 @@ private:
llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
OtherAliasScopeListMap;
+ /// A map from pointers to second level alias scopes.
+ llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
+ SecondLevelAliasScopeMap;
+
+ /// A map from pointers to second level alias scope list of other pointers.
+ llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::MDNode *>
+ SecondLevelOtherAliasScopeListMap;
+
+ /// Inter iteration alias-free base pointers.
+ llvm::SmallPtrSet<llvm::Value *, 4> InterIterationAliasFreeBasePtrs;
+
llvm::DenseMap<llvm::AssertingVH<llvm::Value>, llvm::AssertingVH<llvm::Value>>
AlternativeAliasBases;
};
Modified: polly/trunk/lib/CodeGen/IRBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IRBuilder.cpp?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IRBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IRBuilder.cpp Wed Mar 22 09:25:24 2017
@@ -115,6 +115,49 @@ void ScopAnnotator::annotateLoopLatch(Br
B->setMetadata("llvm.loop", Id);
}
+/// Get the pointer operand
+///
+/// @param Inst The instruction to be analyzed.
+/// @return the pointer operand in case @p Inst is a memory access
+/// instruction and nullptr otherwise.
+static llvm::Value *getMemAccInstPointerOperand(Instruction *Inst) {
+ auto MemInst = MemAccInst::dyn_cast(Inst);
+ if (!MemInst)
+ return nullptr;
+
+ return MemInst.getPointerOperand();
+}
+
+void ScopAnnotator::annotateSecondLevel(llvm::Instruction *Inst,
+ llvm::Value *BasePtr) {
+ auto *Ptr = getMemAccInstPointerOperand(Inst);
+ if (!Ptr)
+ return;
+ auto SecondLevelAliasScope = SecondLevelAliasScopeMap.lookup(Ptr);
+ auto SecondLevelOtherAliasScopeList =
+ SecondLevelOtherAliasScopeListMap.lookup(Ptr);
+ if (!SecondLevelAliasScope) {
+ auto AliasScope = AliasScopeMap.lookup(BasePtr);
+ if (!AliasScope)
+ return;
+ LLVMContext &Ctx = SE->getContext();
+ SecondLevelAliasScope = getID(
+ Ctx, AliasScope, MDString::get(Ctx, "second level alias metadata"));
+ SecondLevelAliasScopeMap[Ptr] = SecondLevelAliasScope;
+ Metadata *Args = {SecondLevelAliasScope};
+ auto SecondLevelBasePtrAliasScopeList =
+ SecondLevelAliasScopeMap.lookup(BasePtr);
+ SecondLevelAliasScopeMap[BasePtr] = MDNode::concatenate(
+ SecondLevelBasePtrAliasScopeList, MDNode::get(Ctx, Args));
+ auto OtherAliasScopeList = OtherAliasScopeListMap.lookup(BasePtr);
+ SecondLevelOtherAliasScopeList = MDNode::concatenate(
+ OtherAliasScopeList, SecondLevelBasePtrAliasScopeList);
+ SecondLevelOtherAliasScopeListMap[Ptr] = SecondLevelOtherAliasScopeList;
+ }
+ Inst->setMetadata("alias.scope", SecondLevelAliasScope);
+ Inst->setMetadata("noalias", SecondLevelOtherAliasScopeList);
+}
+
void ScopAnnotator::annotate(Instruction *Inst) {
if (!Inst->mayReadOrWriteMemory())
return;
@@ -126,11 +169,7 @@ void ScopAnnotator::annotate(Instruction
if (!AliasScopeDomain)
return;
- auto MemInst = MemAccInst::dyn_cast(Inst);
- if (!MemInst)
- return;
-
- auto *Ptr = MemInst.getPointerOperand();
+ auto *Ptr = getMemAccInstPointerOperand(Inst);
if (!Ptr)
return;
@@ -162,6 +201,18 @@ void ScopAnnotator::annotate(Instruction
"BasePtr either expected in AliasScopeMap and OtherAlias...Map");
auto *OtherAliasScopeList = OtherAliasScopeListMap[BasePtr];
+ if (InterIterationAliasFreeBasePtrs.count(BasePtr)) {
+ annotateSecondLevel(Inst, BasePtr);
+ return;
+ }
+
Inst->setMetadata("alias.scope", AliasScope);
Inst->setMetadata("noalias", OtherAliasScopeList);
}
+
+void ScopAnnotator::addInterIterationAliasFreeBasePtr(llvm::Value *BasePtr) {
+ if (!BasePtr)
+ return;
+
+ InterIterationAliasFreeBasePtrs.insert(BasePtr);
+}
Modified: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/IslNodeBuilder.cpp?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp (original)
+++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp Wed Mar 22 09:25:24 2017
@@ -381,6 +381,10 @@ void IslNodeBuilder::createMark(__isl_ta
isl_id_free(Id);
return;
}
+ if (!strcmp(isl_id_get_name(Id), "Inter iteration alias-free")) {
+ auto *BasePtr = static_cast<Value *>(isl_id_get_user(Id));
+ Annotator.addInterIterationAliasFreeBasePtr(BasePtr);
+ }
create(Child);
isl_id_free(Id);
}
Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Wed Mar 22 09:25:24 2017
@@ -1248,10 +1248,26 @@ isolateAndUnrollMatMulInnerLoops(__isl_t
return Node;
}
+/// Mark @p BasePtr with "Inter iteration alias-free" mark node.
+///
+/// @param Node The child of the mark node to be inserted.
+/// @param BasePtr The pointer to be marked.
+/// @return The modified isl_schedule_node.
+static isl_schedule_node *markInterIterationAliasFree(isl_schedule_node *Node,
+ llvm::Value *BasePtr) {
+ if (!BasePtr)
+ return Node;
+
+ auto *Ctx = isl_schedule_node_get_ctx(Node);
+ auto *Id = isl_id_alloc(Ctx, "Inter iteration alias-free", BasePtr);
+ return isl_schedule_node_child(isl_schedule_node_insert_mark(Node, Id), 0);
+}
+
__isl_give isl_schedule_node *ScheduleTreeOptimizer::optimizeMatMulPattern(
__isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI,
MatMulInfoTy &MMI) {
assert(TTI && "The target transform info should be provided.");
+ Node = markInterIterationAliasFree(Node, MMI.WriteToC->getLatestBaseAddr());
int DimOutNum = isl_schedule_node_band_n_member(Node);
assert(DimOutNum > 2 && "In case of the matrix multiplication the loop nest "
"and, consequently, the corresponding scheduling "
Modified: polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll Wed Mar 22 09:25:24 2017
@@ -27,6 +27,7 @@
; CHECK-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
; CHECK-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
; CHECK-NEXT: }
+; CHECK-NEXT: // Inter iteration alias-free
; CHECK-NEXT: // 1st level tiling - Tiles
; CHECK-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) {
; CHECK-NEXT: for (int c3 = 0; c3 <= 1055; c3 += 1)
Added: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll?rev=298510&view=auto
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll (added)
+++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_10.ll Wed Mar 22 09:25:24 2017
@@ -0,0 +1,70 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-invariant-load-hoisting=true \
+; RUN: -polly-pattern-matching-based-opts=true \
+; RUN: -polly-target-throughput-vector-fma=1 \
+; RUN: -polly-target-latency-vector-fma=1 \
+; RUN: -polly-codegen -polly-target-1st-cache-level-associativity=8 \
+; RUN: -polly-target-2nd-cache-level-associativity=8 \
+; RUN: -polly-target-1st-cache-level-size=32768 \
+; RUN: -polly-target-vector-register-bitwidth=256 \
+; RUN: -polly-target-2nd-cache-level-size=262144 -S < %s \
+; RUN: | FileCheck %s
+;
+; This test case checks whether Polly generates second level alias metadata
+; to distinguish the specific accesses in case of the ublas gemm kernel.
+;
+; CHECK: %tmp22_p_scalar_ = load double, double* %scevgep168, align 8, !alias.scope !10, !noalias !2
+; CHECK: store double %p_tmp23, double* %scevgep168, align 8, !alias.scope !10, !noalias !2
+; CHECK: %tmp22_p_scalar_188 = load double, double* %scevgep187, align 8, !alias.scope !11, !noalias !12
+; CHECK: store double %p_tmp23189, double* %scevgep187, align 8, !alias.scope !11, !noalias !12
+; CHECK: %tmp22_p_scalar_209 = load double, double* %scevgep208, align 8, !alias.scope !13, !noalias !14
+; CHECK: store double %p_tmp23210, double* %scevgep208, align 8, !alias.scope !13, !noalias !14
+; CHECK: %tmp22_p_scalar_230 = load double, double* %scevgep229, align 8, !alias.scope !15, !noalias !16
+; CHECK: store double %p_tmp23231, double* %scevgep229, align 8, !alias.scope !15, !noalias !16
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1024 x double]* %arg6, [1056 x double]* %arg7) {
+bb:
+ br label %bb8
+
+bb8: ; preds = %bb29, %bb
+ %tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ]
+ br label %bb9
+
+bb9: ; preds = %bb26, %bb8
+ %tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ]
+ %tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10
+ %tmp12 = load double, double* %tmp11, align 8
+ %tmp13 = fmul double %tmp12, %arg4
+ store double %tmp13, double* %tmp11, align 8
+ br label %Copy_0
+
+Copy_0: ; preds = %Copy_0, %bb9
+ %tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ]
+ %tmp16 = getelementptr inbounds [1024 x double], [1024 x double]* %arg6, i64 %tmp, i64 %tmp15
+ %tmp17 = load double, double* %tmp16, align 8
+ %tmp18 = fmul double %tmp17, %arg3
+ %tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10
+ %tmp20 = load double, double* %tmp19, align 8
+ %tmp21 = fmul double %tmp18, %tmp20
+ %tmp22 = load double, double* %tmp11, align 8
+ %tmp23 = fadd double %tmp22, %tmp21
+ store double %tmp23, double* %tmp11, align 8
+ %tmp24 = add nuw nsw i64 %tmp15, 1
+ %tmp25 = icmp ne i64 %tmp24, 1024
+ br i1 %tmp25, label %Copy_0, label %bb26
+
+bb26: ; preds = %Copy_0
+ %tmp27 = add nuw nsw i64 %tmp10, 1
+ %tmp28 = icmp ne i64 %tmp27, 1056
+ br i1 %tmp28, label %bb9, label %bb29
+
+bb29: ; preds = %bb26
+ %tmp30 = add nuw nsw i64 %tmp, 1
+ %tmp31 = icmp ne i64 %tmp30, 1056
+ br i1 %tmp31, label %bb8, label %bb32
+
+bb32: ; preds = %bb29
+ ret void
+}
Modified: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll Wed Mar 22 09:25:24 2017
@@ -33,6 +33,7 @@
; CHECK-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
; CHECK-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
; CHECK-NEXT: }
+; CHECK-NEXT: // Inter iteration alias-free
; CHECK-NEXT: // Register tiling - Tiles
; CHECK-NEXT: for (int c0 = 0; c0 <= 131; c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= 263; c1 += 1)
@@ -84,6 +85,7 @@
; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1)
; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
; EXTRACTION-OF-MACRO-KERNEL-NEXT: }
+; EXTRACTION-OF-MACRO-KERNEL-NEXT: // Inter iteration alias-free
; EXTRACTION-OF-MACRO-KERNEL-NEXT: // 1st level tiling - Tiles
; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) {
; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c3 = 0; c3 <= 1055; c3 += 1)
Modified: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll?rev=298510&r1=298509&r2=298510&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll Wed Mar 22 09:25:24 2017
@@ -37,6 +37,7 @@
; C[i][j] += A[i][k] * B[k][j];
;
; CHECK: if (ni >= 1) {
+; CHECK-NEXT: // Inter iteration alias-free
; CHECK-NEXT: // 1st level tiling - Tiles
; CHECK-NEXT: for (int c0 = 0; c0 <= floord(nj - 1, 2048); c0 += 1)
; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nk - 1, 256); c1 += 1) {
More information about the llvm-commits
mailing list