[polly] r289815 - Restrict ranges of extension maps

Roman Gareev via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 15 04:35:59 PST 2016


Author: romangareev
Date: Thu Dec 15 06:35:59 2016
New Revision: 289815

URL: http://llvm.org/viewvc/llvm-project?rev=289815&view=rev
Log:
Restrict ranges of extension maps

To prevent copy statements from accessing arrays out of bounds, ranges of their
extension maps are restricted, according to the constraints of domains.

Reviewed-by: Michael Kruse <llvm at meinersbur.de>

Differential Revision: https://reviews.llvm.org/D25655

Added:
    polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll
Modified:
    polly/trunk/lib/Transform/ScheduleOptimizer.cpp

Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=289815&r1=289814&r2=289815&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Thu Dec 15 06:35:59 2016
@@ -851,6 +851,8 @@ createExtensionNode(__isl_take isl_sched
 static __isl_give isl_schedule_node *optimizeDataLayoutMatrMulPattern(
     __isl_take isl_schedule_node *Node, __isl_take isl_map *MapOldIndVar,
     MicroKernelParamsTy MicroParams, MacroKernelParamsTy MacroParams) {
+  // Check whether memory accesses of the SCoP statement correspond to
+  // the matrix multiplication pattern and if this is true, obtain them.
   auto InputDimsId = isl_map_get_tuple_id(MapOldIndVar, isl_dim_in);
   auto *Stmt = static_cast<ScopStmt *>(isl_id_get_user(InputDimsId));
   isl_id_free(InputDimsId);
@@ -860,6 +862,9 @@ static __isl_give isl_schedule_node *opt
     isl_map_free(MapOldIndVar);
     return Node;
   }
+
+  // Create a copy statement that corresponds to the memory access to the
+  // matrix B, the second operand of the matrix multiplication.
   Node = isl_schedule_node_parent(isl_schedule_node_parent(Node));
   Node = isl_schedule_node_parent(isl_schedule_node_parent(Node));
   Node = isl_schedule_node_parent(Node);
@@ -879,10 +884,19 @@ static __isl_give isl_schedule_node *opt
   isl_map_move_dims(ExtMap, isl_dim_in, 2, isl_dim_out, 0, 1);
   ExtMap = isl_map_project_out(ExtMap, isl_dim_in, 2, 1);
   auto *Domain = Stmt->getDomain();
+
+  // Restrict the domains of the copy statements to only execute when also its
+  // originating statement is executed.
+  auto *DomainId = isl_set_get_tuple_id(Domain);
   auto *NewStmt = Stmt->getParent()->addScopStmt(
       OldAcc, MemAccessB->getAccessRelation(), isl_set_copy(Domain));
+  ExtMap = isl_map_set_tuple_id(ExtMap, isl_dim_out, isl_id_copy(DomainId));
+  ExtMap = isl_map_intersect_range(ExtMap, isl_set_copy(Domain));
   ExtMap = isl_map_set_tuple_id(ExtMap, isl_dim_out, NewStmt->getDomainId());
   Node = createExtensionNode(Node, ExtMap);
+
+  // Create a copy statement that corresponds to the memory access
+  // to the matrix A, the first operand of the matrix multiplication.
   Node = isl_schedule_node_child(Node, 0);
   AccRel = getMatMulAccRel(MapOldIndVar, MacroParams.Kc, 4, 6);
   FirstDimSize = MacroParams.Mc * MacroParams.Kc / MicroParams.Mr;
@@ -896,7 +910,12 @@ static __isl_give isl_schedule_node *opt
   isl_map_move_dims(ExtMap, isl_dim_out, 0, isl_dim_in, 0, 1);
   isl_map_move_dims(ExtMap, isl_dim_in, 2, isl_dim_out, 0, 1);
   NewStmt = Stmt->getParent()->addScopStmt(
-      OldAcc, MemAccessA->getAccessRelation(), Domain);
+      OldAcc, MemAccessA->getAccessRelation(), isl_set_copy(Domain));
+
+  // Restrict the domains of the copy statements to only execute when also its
+  // originating statement is executed.
+  ExtMap = isl_map_set_tuple_id(ExtMap, isl_dim_out, DomainId);
+  ExtMap = isl_map_intersect_range(ExtMap, Domain);
   ExtMap = isl_map_set_tuple_id(ExtMap, isl_dim_out, NewStmt->getDomainId());
   Node = createExtensionNode(Node, ExtMap);
   Node = isl_schedule_node_child(isl_schedule_node_child(Node, 0), 0);

Added: polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll?rev=289815&view=auto
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll (added)
+++ polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll Thu Dec 15 06:35:59 2016
@@ -0,0 +1,127 @@
+; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -polly-target-througput-vector-fma=1 -polly-target-latency-vector-fma=8 -polly-target-cache-level-associativity=8,8 -polly-target-cache-level-sizes=32768,262144 -polly-ast -analyze < %s | FileCheck %s
+;
+;    /* C := alpha*A*B + beta*C */
+;    /* _PB_NK % Kc != 0 */
+;    for (i = 0; i < _PB_NI; i++)
+;      for (j = 0; j < _PB_NJ; j++)
+;        {
+;	   C[i][j] *= beta;
+;	   for (k = 0; k < _PB_NK; ++k)
+;	     C[i][j] += alpha * A[i][k] * B[k][j];
+;        }
+;
+; CHECK:    {
+; CHECK-NEXT:      // 1st level tiling - Tiles
+; CHECK-NEXT:      for (int c0 = 0; c0 <= 32; c0 += 1)
+; CHECK-NEXT:        for (int c1 = 0; c1 <= 32; c1 += 1) {
+; CHECK-NEXT:          // 1st level tiling - Points
+; CHECK-NEXT:          for (int c2 = 0; c2 <= 31; c2 += 1)
+; CHECK-NEXT:            for (int c3 = 0; c3 <= 31; c3 += 1)
+; CHECK-NEXT:              Stmt_bb9(32 * c0 + c2, 32 * c1 + c3);
+; CHECK-NEXT:        }
+; CHECK-NEXT:      // 1st level tiling - Tiles
+; CHECK-NEXT:      for (int c0 = 0; c0 <= 65; c0 += 1)
+; CHECK-NEXT:        for (int c1 = 0; c1 <= 3; c1 += 1) {
+; CHECK-NEXT:          for (int c3 = 16 * c0; c3 <= 16 * c0 + 15; c3 += 1)
+; CHECK-NEXT:            for (int c4 = 256 * c1; c4 <= min(1022, 256 * c1 + 255); c4 += 1)
+; CHECK-NEXT:              CopyStmt_0(0, c3, c4);
+; CHECK-NEXT:          for (int c2 = 0; c2 <= 10; c2 += 1) {
+; CHECK-NEXT:            for (int c3 = 96 * c2; c3 <= 96 * c2 + 95; c3 += 1)
+; CHECK-NEXT:              for (int c5 = 256 * c1; c5 <= min(1022, 256 * c1 + 255); c5 += 1)
+; CHECK-NEXT:                CopyStmt_1(c3, 0, c5);
+; CHECK-NEXT:            // 1st level tiling - Points
+; CHECK-NEXT:            // Register tiling - Tiles
+; CHECK-NEXT:            for (int c3 = 0; c3 <= 1; c3 += 1)
+; CHECK-NEXT:              for (int c4 = 0; c4 <= 23; c4 += 1)
+; CHECK-NEXT:                for (int c5 = 0; c5 <= min(255, -256 * c1 + 1022); c5 += 1) {
+; CHECK-NEXT:                  // Register tiling - Points
+; CHECK-NEXT:                  // 1st level tiling - Tiles
+; CHECK-NEXT:                  // 1st level tiling - Points
+; CHECK-NEXT:                  {
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                  }
+; CHECK-NEXT:                }
+; CHECK-NEXT:          }
+; CHECK-NEXT:        }
+; CHECK-NEXT:    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1023 x double]* %arg6, [1056 x double]* %arg7) #0 {
+bb:
+  br label %bb8
+
+bb8:                                              ; preds = %bb29, %bb
+  %tmp = phi i64 [ 0, %bb ], [ %tmp30, %bb29 ]
+  br label %bb9
+
+bb9:                                              ; preds = %bb26, %bb8
+  %tmp10 = phi i64 [ 0, %bb8 ], [ %tmp27, %bb26 ]
+  %tmp11 = getelementptr inbounds [1056 x double], [1056 x double]* %arg5, i64 %tmp, i64 %tmp10
+  %tmp12 = load double, double* %tmp11, align 8
+  %tmp13 = fmul double %tmp12, %arg4
+  store double %tmp13, double* %tmp11, align 8
+  br label %Copy_0
+
+Copy_0:                                             ; preds = %Copy_0, %bb9
+  %tmp15 = phi i64 [ 0, %bb9 ], [ %tmp24, %Copy_0 ]
+  %tmp16 = getelementptr inbounds [1023 x double], [1023 x double]* %arg6, i64 %tmp, i64 %tmp15
+  %tmp17 = load double, double* %tmp16, align 8
+  %tmp18 = fmul double %tmp17, %arg3
+  %tmp19 = getelementptr inbounds [1056 x double], [1056 x double]* %arg7, i64 %tmp15, i64 %tmp10
+  %tmp20 = load double, double* %tmp19, align 8
+  %tmp21 = fmul double %tmp18, %tmp20
+  %tmp22 = load double, double* %tmp11, align 8
+  %tmp23 = fadd double %tmp22, %tmp21
+  store double %tmp23, double* %tmp11, align 8
+  %tmp24 = add nuw nsw i64 %tmp15, 1
+  %tmp25 = icmp ne i64 %tmp24, 1023
+  br i1 %tmp25, label %Copy_0, label %bb26
+
+bb26:                                             ; preds = %Copy_0
+  %tmp27 = add nuw nsw i64 %tmp10, 1
+  %tmp28 = icmp ne i64 %tmp27, 1056
+  br i1 %tmp28, label %bb9, label %bb29
+
+bb29:                                             ; preds = %bb26
+  %tmp30 = add nuw nsw i64 %tmp, 1
+  %tmp31 = icmp ne i64 %tmp30, 1056
+  br i1 %tmp31, label %bb8, label %bb32
+
+bb32:                                             ; preds = %bb29
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "target-cpu"="x86-64" "target-features"="+aes,+avx,+cmov,+cx16,+fxsr,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" }




More information about the llvm-commits mailing list