[polly] r312929 - Unroll and separate the remaining parts of isolation

Roman Gareev via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 11 10:46:47 PDT 2017


Author: romangareev
Date: Mon Sep 11 10:46:47 2017
New Revision: 312929

URL: http://llvm.org/viewvc/llvm-project?rev=312929&view=rev
Log:
Unroll and separate the remaining parts of isolation

The remaining parts produced by the full partial tile isolation can contain
hot spots that are worth to be optimized. Currently, we rely on the simple
loop unrolling pass, LiCM and the SLP vectorizer to optimize such parts.
However, the approach can suffer from the lack of the information about
aliasing that Polly provides using additional alias metadata or/and the lack
of the information required by simple loop unrolling pass.

This patch is the first step to optimize the remaining parts. To do it, we
unroll and separate them. In case of, for instance, Intel Kaby Lake, it helps
to increase the performance of the generated code from 39.87 GFlop/s to
49.23 GFlop/s.

The next possible step is to avoid unrolling performed by Polly in case of
isolated and remaining parts and rely only on simple loop unrolling pass and
the Loop vectorizer.

Reviewed-by: Tobias Grosser <tobias at grosser.es>

Differential Revision: https://reviews.llvm.org/D37692

Modified:
    polly/trunk/lib/Transform/ScheduleOptimizer.cpp
    polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_12.ll
    polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_13.ll
    polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
    polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_6.ll

Modified: polly/trunk/lib/Transform/ScheduleOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Transform/ScheduleOptimizer.cpp?rev=312929&r1=312928&r2=312929&view=diff
==============================================================================
--- polly/trunk/lib/Transform/ScheduleOptimizer.cpp (original)
+++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp Mon Sep 11 10:46:47 2017
@@ -323,19 +323,20 @@ static isl::union_set getIsolateOptions(
   return isl::union_set(IsolateOption);
 }
 
-/// Create an isl::union_set, which describes the atomic option for the
+namespace {
+/// Create an isl::union_set, which describes the specified option for the
 /// dimension of the current node.
 ///
-/// It may help to reduce the size of generated code.
-///
-/// @param Ctx An isl::ctx, which is used to create the isl::union_set.
-static isl::union_set getAtomicOptions(isl::ctx Ctx) {
+/// @param Ctx    An isl::ctx, which is used to create the isl::union_set.
+/// @param Option The name of the option.
+isl::union_set getDimOptions(isl::ctx Ctx, const char *Option) {
   isl::space Space(Ctx, 0, 1);
-  isl::set AtomicOption = isl::set::universe(Space);
-  isl::id Id = isl::id::alloc(Ctx, "atomic", nullptr);
-  AtomicOption = AtomicOption.set_tuple_id(Id);
-  return isl::union_set(AtomicOption);
+  auto DimOption = isl::set::universe(Space);
+  auto Id = isl::id::alloc(Ctx, Option, nullptr);
+  DimOption = DimOption.set_tuple_id(Id);
+  return isl::union_set(DimOption);
 }
+} // namespace
 
 /// Create an isl::union_set, which describes the option of the form
 /// [isolate[] -> unroll[x]].
@@ -391,7 +392,7 @@ ScheduleTreeOptimizer::isolateFullPartia
   isl::map ScheduleRelation = isl::map::from_union_map(SchedRelUMap);
   isl::set ScheduleRange = ScheduleRelation.range();
   isl::set IsolateDomain = getPartialTilePrefixes(ScheduleRange, VectorWidth);
-  isl::union_set AtomicOption = getAtomicOptions(IsolateDomain.get_ctx());
+  auto AtomicOption = getDimOptions(IsolateDomain.get_ctx(), "atomic");
   isl::union_set IsolateOption = getIsolateOptions(IsolateDomain, 1);
   Node = Node.parent().parent();
   isl::union_set Options = IsolateOption.unite(AtomicOption);
@@ -1207,13 +1208,12 @@ isolateAndUnrollMatMulInnerLoops(isl::sc
   isl::union_set IsolateOption =
       getIsolateOptions(Prefix.add_dims(isl::dim::set, 3), 3);
   isl::ctx Ctx = Node.get_ctx();
-  isl::union_set AtomicOption = getAtomicOptions(Ctx);
-  isl::union_set Options = IsolateOption.unite(AtomicOption);
+  auto Options = IsolateOption.unite(getDimOptions(Ctx, "unroll"));
   Options = Options.unite(getUnrollIsolatedSetOptions(Ctx));
   Node = Node.band_set_ast_build_options(Options);
   Node = Node.parent().parent().parent();
   IsolateOption = getIsolateOptions(Prefix, 3);
-  Options = IsolateOption.unite(AtomicOption);
+  Options = IsolateOption.unite(getDimOptions(Ctx, "separate"));
   Node = Node.band_set_ast_build_options(Options);
   Node = Node.child(0).child(0).child(0);
   return Node;

Modified: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_12.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_12.ll?rev=312929&r1=312928&r2=312929&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_12.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_12.ll Mon Sep 11 10:46:47 2017
@@ -287,26 +287,378 @@
 ; CHECK-NEXT:                  Stmt_for_body6(384 * c2 + 8 * c4 + 7, 32 * c3 + 31, 512 * c1 + c5);
 ; CHECK-NEXT:                }
 ; CHECK-NEXT:              }
-; CHECK-NEXT:            if (c2 == 2)
-; CHECK-NEXT:              for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) {
-; CHECK-NEXT:                // Loop Vectorizer Disabled
-; CHECK-NEXT:                // Register tiling - Points
-; CHECK-NEXT:                for (int c6 = 0; c6 <= 3; c6 += 1)
-; CHECK-NEXT:                  for (int c7 = 0; c7 <= 31; c7 += 1)
-; CHECK-NEXT:                    Stmt_for_body6(c6 + 1016, 32 * c3 + c7, 512 * c1 + c5);
+; CHECK-NEXT:                if (c2 == 2)
+; CHECK-NEXT:                  for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) {
+; CHECK-NEXT:                    // Loop Vectorizer Disabled
+; CHECK-NEXT:                    // Register tiling - Points
+; CHECK-NEXT:                    {
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 1, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 2, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 3, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 4, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 5, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 6, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 7, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 8, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 9, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 10, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 11, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 12, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 13, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 14, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 15, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 16, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 17, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 18, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 19, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 20, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 21, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 22, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 23, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 24, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 25, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 26, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 27, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 28, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 29, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 30, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1016, 32 * c3 + 31, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 1, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 2, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 3, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 4, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 5, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 6, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 7, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 8, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 9, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 10, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 11, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 12, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 13, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 14, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 15, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 16, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 17, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 18, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 19, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 20, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 21, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 22, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 23, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 24, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 25, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 26, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 27, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 28, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 29, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 30, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1017, 32 * c3 + 31, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 1, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 2, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 3, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 4, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 5, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 6, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 7, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 8, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 9, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 10, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 11, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 12, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 13, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 14, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 15, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 16, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 17, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 18, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 19, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 20, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 21, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 22, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 23, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 24, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 25, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 26, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 27, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 28, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 29, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 30, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1018, 32 * c3 + 31, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 1, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 2, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 3, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 4, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 5, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 6, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 7, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 8, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 9, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 10, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 11, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 12, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 13, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 14, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 15, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 16, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 17, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 18, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 19, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 20, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 21, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 22, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 23, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 24, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 25, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 26, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 27, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 28, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 29, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 30, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(1019, 32 * c3 + 31, 512 * c1 + c5);
+; CHECK-NEXT:                    }
+; CHECK-NEXT:                  }
 ; CHECK-NEXT:              }
-; CHECK-NEXT:          }
-; CHECK-NEXT:          for (int c4 = 0; c4 <= min(47, -48 * c2 + 127); c4 += 1)
-; CHECK-NEXT:            for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) {
-; CHECK-NEXT:              // Loop Vectorizer Disabled
-; CHECK-NEXT:              // Register tiling - Points
-; CHECK-NEXT:              for (int c6 = 0; c6 <= min(7, -384 * c2 - 8 * c4 + 1019); c6 += 1)
-; CHECK-NEXT:                for (int c7 = 0; c7 <= 27; c7 += 1)
-; CHECK-NEXT:                  Stmt_for_body6(384 * c2 + 8 * c4 + c6, c7 + 992, 512 * c1 + c5);
+; CHECK-NEXT:              for (int c4 = 0; c4 <= min(47, -48 * c2 + 127); c4 += 1)
+; CHECK-NEXT:                for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) {
+; CHECK-NEXT:                  // Loop Vectorizer Disabled
+; CHECK-NEXT:                  // Register tiling - Points
+; CHECK-NEXT:                  {
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 992, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 993, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 994, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 995, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 996, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 997, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 998, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 999, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1000, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1001, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1002, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1003, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1004, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1005, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1006, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1007, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1008, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1009, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1010, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1011, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1012, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1013, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1014, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1015, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1016, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1017, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1018, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4, 1019, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 992, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 993, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 994, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 995, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 996, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 997, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 998, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 999, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1000, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1001, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1002, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1003, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1004, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1005, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1006, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1007, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1008, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1009, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1010, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1011, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1012, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1013, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1014, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1015, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1016, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1017, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1018, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1019, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 992, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 993, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 994, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 995, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 996, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 997, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 998, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 999, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1000, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1001, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1002, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1003, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1004, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1005, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1006, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1007, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1008, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1009, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1010, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1011, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1012, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1013, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1014, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1015, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1016, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1017, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1018, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1019, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 992, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 993, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 994, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 995, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 996, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 997, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 998, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 999, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1000, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1001, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1002, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1003, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1004, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1005, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1006, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1007, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1008, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1009, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1010, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1011, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1012, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1013, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1014, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1015, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1016, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1017, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1018, 512 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1019, 512 * c1 + c5);
+; CHECK-NEXT:                    if (48 * c2 + c4 <= 126) {
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 992, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 993, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 994, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 995, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 996, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 997, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 998, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 999, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1000, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1001, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1002, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1003, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1004, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1005, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1006, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1007, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1008, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1009, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1010, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1011, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1012, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1013, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1014, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1015, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1016, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1017, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1018, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1019, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 992, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 993, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 994, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 995, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 996, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 997, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 998, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 999, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1000, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1001, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1002, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1003, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1004, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1005, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1006, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1007, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1008, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1009, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1010, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1011, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1012, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1013, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1014, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1015, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1016, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1017, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1018, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1019, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 992, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 993, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 994, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 995, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 996, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 997, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 998, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 999, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1000, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1001, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1002, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1003, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1004, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1005, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1006, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1007, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1008, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1009, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1010, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1011, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1012, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1013, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1014, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1015, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1016, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1017, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1018, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1019, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 992, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 993, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 994, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 995, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 996, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 997, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 998, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 999, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1000, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1001, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1002, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1003, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1004, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1005, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1006, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1007, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1008, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1009, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1010, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1011, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1012, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1013, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1014, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1015, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1016, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1017, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1018, 512 * c1 + c5);
+; CHECK-NEXT:                      Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1019, 512 * c1 + c5);
+; CHECK-NEXT:                    }
+; CHECK-NEXT:                  }
+; CHECK-NEXT:                }
 ; CHECK-NEXT:            }
+; CHECK-NEXT:          }
 ; CHECK-NEXT:        }
-; CHECK-NEXT:      }
-; CHECK-NEXT:    }
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

Modified: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_13.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_13.ll?rev=312929&r1=312928&r2=312929&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_13.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_13.ll Mon Sep 11 10:46:47 2017
@@ -11,70 +11,79 @@
 ; Test whether isolation works as expected.
 ;
 ; CHECK:   // Inter iteration alias-free
-; CHECK-NEXT:    // 1st level tiling - Tiles
-; CHECK-NEXT:    for (int c0 = 0; c0 <= 1; c0 += 1)
-; CHECK-NEXT:      for (int c1 = 0; c1 <= 6; c1 += 1) {
-; CHECK-NEXT:        for (int c3 = 1536 * c0; c3 <= min(1999, 1536 * c0 + 1535); c3 += 1)
-; CHECK-NEXT:          for (int c4 = 307 * c1; c4 <= min(1999, 307 * c1 + 306); c4 += 1)
-; CHECK-NEXT:            CopyStmt_0(0, c3, c4);
-; CHECK-NEXT:        for (int c2 = 0; c2 <= 24; c2 += 1) {
-; CHECK-NEXT:          if (c0 == 0)
-; CHECK-NEXT:            for (int c3 = 80 * c2; c3 <= 80 * c2 + 79; c3 += 1)
-; CHECK-NEXT:              for (int c5 = 307 * c1; c5 <= min(1999, 307 * c1 + 306); c5 += 1)
-; CHECK-NEXT:                CopyStmt_1(c3, 0, c5);
-; CHECK-NEXT:          // 1st level tiling - Points
-; CHECK-NEXT:          // Register tiling - Tiles
-; CHECK-NEXT:          {
-; CHECK-NEXT:            for (int c3 = 0; c3 <= min(255, -256 * c0 + 332); c3 += 1)
-; CHECK-NEXT:              for (int c4 = 0; c4 <= 15; c4 += 1)
-; CHECK-NEXT:                for (int c5 = 0; c5 <= min(306, -307 * c1 + 1999); c5 += 1) {
-; CHECK-NEXT:                  // Loop Vectorizer Disabled
-; CHECK-NEXT:                  // Register tiling - Points
-; CHECK-NEXT:                  {
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5);
-; CHECK-NEXT:                    Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5);
-; CHECK-NEXT:                  }
+; CHECK-NEXT:          // 1st level tiling - Tiles
+; CHECK-NEXT:          for (int c0 = 0; c0 <= 1; c0 += 1)
+; CHECK-NEXT:            for (int c1 = 0; c1 <= 6; c1 += 1) {
+; CHECK-NEXT:              for (int c3 = 1536 * c0; c3 <= min(1999, 1536 * c0 + 1535); c3 += 1)
+; CHECK-NEXT:                for (int c4 = 307 * c1; c4 <= min(1999, 307 * c1 + 306); c4 += 1)
+; CHECK-NEXT:                  CopyStmt_0(0, c3, c4);
+; CHECK-NEXT:              for (int c2 = 0; c2 <= 24; c2 += 1) {
+; CHECK-NEXT:                if (c0 == 0)
+; CHECK-NEXT:                  for (int c3 = 80 * c2; c3 <= 80 * c2 + 79; c3 += 1)
+; CHECK-NEXT:                    for (int c5 = 307 * c1; c5 <= min(1999, 307 * c1 + 306); c5 += 1)
+; CHECK-NEXT:                      CopyStmt_1(c3, 0, c5);
+; CHECK-NEXT:                // 1st level tiling - Points
+; CHECK-NEXT:                // Register tiling - Tiles
+; CHECK-NEXT:                {
+; CHECK-NEXT:                  for (int c3 = 0; c3 <= min(255, -256 * c0 + 332); c3 += 1)
+; CHECK-NEXT:                    for (int c4 = 0; c4 <= 15; c4 += 1)
+; CHECK-NEXT:                      for (int c5 = 0; c5 <= min(306, -307 * c1 + 1999); c5 += 1) {
+; CHECK-NEXT:                        // Loop Vectorizer Disabled
+; CHECK-NEXT:                        // Register tiling - Points
+; CHECK-NEXT:                        {
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5);
+; CHECK-NEXT:                        }
+; CHECK-NEXT:                      }
+; CHECK-NEXT:                  if (c0 == 1)
+; CHECK-NEXT:                    for (int c4 = 0; c4 <= 15; c4 += 1)
+; CHECK-NEXT:                      for (int c5 = 0; c5 <= min(306, -307 * c1 + 1999); c5 += 1) {
+; CHECK-NEXT:                        // Loop Vectorizer Disabled
+; CHECK-NEXT:                        // Register tiling - Points
+; CHECK-NEXT:                        {
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4, 1998, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4, 1999, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1998, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1999, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1998, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1999, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1998, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1999, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1998, 307 * c1 + c5);
+; CHECK-NEXT:                          Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1999, 307 * c1 + c5);
+; CHECK-NEXT:                        }
+; CHECK-NEXT:                      }
 ; CHECK-NEXT:                }
-; CHECK-NEXT:            if (c0 == 1)
-; CHECK-NEXT:              for (int c4 = 0; c4 <= 15; c4 += 1)
-; CHECK-NEXT:                for (int c5 = 0; c5 <= min(306, -307 * c1 + 1999); c5 += 1) {
-; CHECK-NEXT:                  // Loop Vectorizer Disabled
-; CHECK-NEXT:                  // Register tiling - Points
-; CHECK-NEXT:                  for (int c6 = 0; c6 <= 4; c6 += 1)
-; CHECK-NEXT:                    for (int c7 = 0; c7 <= 1; c7 += 1)
-; CHECK-NEXT:                      Stmt_for_body6(80 * c2 + 5 * c4 + c6, c7 + 1998, 307 * c1 + c5);
-; CHECK-NEXT:                }
-; CHECK-NEXT:          }
-; CHECK-NEXT:        }
-; CHECK-NEXT:      }
+; CHECK-NEXT:              }
+; CHECK-NEXT:            }
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

Modified: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll?rev=312929&r1=312928&r2=312929&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll Mon Sep 11 10:46:47 2017
@@ -36,88 +36,267 @@
 ;	 for (k = 0; k < _PB_NK; ++k)
 ;	   C[i][j] += A[i][k] * B[k][j];
 ;
-; CHECK:    if (ni >= 1) {
-; CHECK-NEXT:      // Inter iteration alias-free
-; CHECK-NEXT:      // 1st level tiling - Tiles
-; CHECK-NEXT:      for (int c0 = 0; c0 <= floord(nj - 1, 2048); c0 += 1)
-; CHECK-NEXT:        for (int c1 = 0; c1 <= floord(nk - 1, 256); c1 += 1) {
-; CHECK-NEXT:          for (int c3 = 2048 * c0; c3 <= min(nj - 1, 2048 * c0 + 2047); c3 += 1)
-; CHECK-NEXT:            for (int c4 = 256 * c1; c4 <= min(nk - 1, 256 * c1 + 255); c4 += 1)
-; CHECK-NEXT:              CopyStmt_0(0, c3, c4);
-; CHECK-NEXT:          for (int c2 = 0; c2 <= floord(ni - 1, 96); c2 += 1) {
-; CHECK-NEXT:            if (c0 == 0)
-; CHECK-NEXT:              for (int c3 = 96 * c2; c3 <= min(ni - 1, 96 * c2 + 95); c3 += 1)
-; CHECK-NEXT:                for (int c5 = 256 * c1; c5 <= min(nk - 1, 256 * c1 + 255); c5 += 1)
-; CHECK-NEXT:                  CopyStmt_1(c3, 0, c5);
-; CHECK-NEXT:            // 1st level tiling - Points
-; CHECK-NEXT:            // Register tiling - Tiles
-; CHECK-NEXT:            {
-; CHECK-NEXT:              if (ni >= 96 * c2 + 4)
-; CHECK-NEXT:                for (int c3 = 0; c3 <= min(255, -256 * c0 + nj / 8 - 1); c3 += 1) {
-; CHECK-NEXT:                  for (int c4 = 0; c4 <= min(23, -24 * c2 + ni / 4 - 1); c4 += 1)
-; CHECK-NEXT:                    for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
-; CHECK-NEXT:                      // Loop Vectorizer Disabled
-; CHECK-NEXT:                      // Register tiling - Points
-; CHECK-NEXT:                      {
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
-; CHECK-NEXT:                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK:          if (ni >= 1) {
+; CHECK-NEXT:            // Inter iteration alias-free
+; CHECK-NEXT:            // 1st level tiling - Tiles
+; CHECK-NEXT:            for (int c0 = 0; c0 <= floord(nj - 1, 2048); c0 += 1)
+; CHECK-NEXT:              for (int c1 = 0; c1 <= floord(nk - 1, 256); c1 += 1) {
+; CHECK-NEXT:                for (int c3 = 2048 * c0; c3 <= min(nj - 1, 2048 * c0 + 2047); c3 += 1)
+; CHECK-NEXT:                  for (int c4 = 256 * c1; c4 <= min(nk - 1, 256 * c1 + 255); c4 += 1)
+; CHECK-NEXT:                    CopyStmt_0(0, c3, c4);
+; CHECK-NEXT:                for (int c2 = 0; c2 <= floord(ni - 1, 96); c2 += 1) {
+; CHECK-NEXT:                  if (c0 == 0)
+; CHECK-NEXT:                    for (int c3 = 96 * c2; c3 <= min(ni - 1, 96 * c2 + 95); c3 += 1)
+; CHECK-NEXT:                      for (int c5 = 256 * c1; c5 <= min(nk - 1, 256 * c1 + 255); c5 += 1)
+; CHECK-NEXT:                        CopyStmt_1(c3, 0, c5);
+; CHECK-NEXT:                  // 1st level tiling - Points
+; CHECK-NEXT:                  // Register tiling - Tiles
+; CHECK-NEXT:                  {
+; CHECK-NEXT:                    if (ni >= 96 * c2 + 4)
+; CHECK-NEXT:                      for (int c3 = 0; c3 <= min(255, -256 * c0 + nj / 8 - 1); c3 += 1) {
+; CHECK-NEXT:                        for (int c4 = 0; c4 <= min(23, -24 * c2 + ni / 4 - 1); c4 += 1)
+; CHECK-NEXT:                          for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
+; CHECK-NEXT:                            // Loop Vectorizer Disabled
+; CHECK-NEXT:                            // Register tiling - Points
+; CHECK-NEXT:                            {
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                            }
+; CHECK-NEXT:                          }
+; CHECK-NEXT:                        if (96 * c2 + 95 >= ni && ni % 4 >= 1)
+; CHECK-NEXT:                          for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
+; CHECK-NEXT:                            // Loop Vectorizer Disabled
+; CHECK-NEXT:                            // Register tiling - Points
+; CHECK-NEXT:                            {
+; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                              Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                              if (ni % 4 >= 2) {
+; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                if ((ni + 1) % 4 == 0) {
+; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                              }
+; CHECK-NEXT:                            }
+; CHECK-NEXT:                          }
+; CHECK-NEXT:                      }
+; CHECK-NEXT:                    if (96 * c2 + 3 >= ni || (2048 * c0 + 2047 >= nj && nj % 8 >= 1)) {
+; CHECK-NEXT:                      if (96 * c2 + 3 >= ni) {
+; CHECK-NEXT:                        for (int c3 = 0; c3 <= min(255, -256 * c0 + (nj - 1) / 8); c3 += 1)
+; CHECK-NEXT:                          for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
+; CHECK-NEXT:                            // Loop Vectorizer Disabled
+; CHECK-NEXT:                            // Register tiling - Points
+; CHECK-NEXT:                            {
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                              if (nj >= 2048 * c0 + 8 * c3 + 2) {
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                if (nj >= 2048 * c0 + 8 * c3 + 3) {
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                  if (nj >= 2048 * c0 + 8 * c3 + 4) {
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj >= 2048 * c0 + 8 * c3 + 5) {
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj >= 2048 * c0 + 8 * c3 + 6) {
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                        if (nj >= 2048 * c0 + 8 * c3 + 7) {
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                          if (nj >= 2048 * c0 + 8 * c3 + 8)
+; CHECK-NEXT:                                            Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                        }
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                              }
+; CHECK-NEXT:                              if (ni >= 96 * c2 + 2) {
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                                if (nj >= 2048 * c0 + 8 * c3 + 2) {
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                  if (nj >= 2048 * c0 + 8 * c3 + 3) {
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj >= 2048 * c0 + 8 * c3 + 4) {
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj >= 2048 * c0 + 8 * c3 + 5) {
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                        if (nj >= 2048 * c0 + 8 * c3 + 6) {
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                          if (nj >= 2048 * c0 + 8 * c3 + 7) {
+; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                            if (nj >= 2048 * c0 + 8 * c3 + 8)
+; CHECK-NEXT:                                              Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                          }
+; CHECK-NEXT:                                        }
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                                if (96 * c2 + 3 == ni) {
+; CHECK-NEXT:                                  Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3, 256 * c1 + c5);
+; CHECK-NEXT:                                  if (nj >= 2048 * c0 + 8 * c3 + 2) {
+; CHECK-NEXT:                                    Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj >= 2048 * c0 + 8 * c3 + 3) {
+; CHECK-NEXT:                                      Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj >= 2048 * c0 + 8 * c3 + 4) {
+; CHECK-NEXT:                                        Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                        if (nj >= 2048 * c0 + 8 * c3 + 5) {
+; CHECK-NEXT:                                          Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                          if (nj >= 2048 * c0 + 8 * c3 + 6) {
+; CHECK-NEXT:                                            Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                            if (nj >= 2048 * c0 + 8 * c3 + 7) {
+; CHECK-NEXT:                                              Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5);
+; CHECK-NEXT:                                              if (nj >= 2048 * c0 + 8 * c3 + 8)
+; CHECK-NEXT:                                                Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5);
+; CHECK-NEXT:                                            }
+; CHECK-NEXT:                                          }
+; CHECK-NEXT:                                        }
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                              }
+; CHECK-NEXT:                            }
+; CHECK-NEXT:                          }
+; CHECK-NEXT:                      } else {
+; CHECK-NEXT:                        for (int c4 = 0; c4 <= min(23, -24 * c2 + (ni - 1) / 4); c4 += 1)
+; CHECK-NEXT:                          for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
+; CHECK-NEXT:                            // Loop Vectorizer Disabled
+; CHECK-NEXT:                            // Register tiling - Points
+; CHECK-NEXT:                            {
+; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj, 256 * c1 + c5);
+; CHECK-NEXT:                              if (nj % 8 >= 2) {
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                if (nj % 8 >= 3) {
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                  if (nj % 8 >= 4) {
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj % 8 >= 5) {
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj % 8 >= 6) {
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                        if ((nj + 1) % 8 == 0)
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4, nj - 1, 256 * c1 + c5);
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                              }
+; CHECK-NEXT:                              if (ni >= 96 * c2 + 4 * c4 + 2) {
+; CHECK-NEXT:                                Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj, 256 * c1 + c5);
+; CHECK-NEXT:                                if (nj % 8 >= 2) {
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                  if (nj % 8 >= 3) {
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj % 8 >= 4) {
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj % 8 >= 5) {
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                        if (nj % 8 >= 6) {
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                          if ((nj + 1) % 8 == 0)
+; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 1, nj - 1, 256 * c1 + c5);
+; CHECK-NEXT:                                        }
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                                if (ni >= 96 * c2 + 4 * c4 + 3) {
+; CHECK-NEXT:                                  Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj, 256 * c1 + c5);
+; CHECK-NEXT:                                  if (nj % 8 >= 2) {
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj % 8 >= 3) {
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj % 8 >= 4) {
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                        if (nj % 8 >= 5) {
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                          if (nj % 8 >= 6) {
+; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                            if ((nj + 1) % 8 == 0)
+; CHECK-NEXT:                                              Stmt_for_body6(96 * c2 + 4 * c4 + 2, nj - 1, 256 * c1 + c5);
+; CHECK-NEXT:                                          }
+; CHECK-NEXT:                                        }
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                  if (ni >= 96 * c2 + 4 * c4 + 4) {
+; CHECK-NEXT:                                    Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj, 256 * c1 + c5);
+; CHECK-NEXT:                                    if (nj % 8 >= 2) {
+; CHECK-NEXT:                                      Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 1, 256 * c1 + c5);
+; CHECK-NEXT:                                      if (nj % 8 >= 3) {
+; CHECK-NEXT:                                        Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 2, 256 * c1 + c5);
+; CHECK-NEXT:                                        if (nj % 8 >= 4) {
+; CHECK-NEXT:                                          Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 3, 256 * c1 + c5);
+; CHECK-NEXT:                                          if (nj % 8 >= 5) {
+; CHECK-NEXT:                                            Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 4, 256 * c1 + c5);
+; CHECK-NEXT:                                            if (nj % 8 >= 6) {
+; CHECK-NEXT:                                              Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 5, 256 * c1 + c5);
+; CHECK-NEXT:                                              if ((nj + 1) % 8 == 0)
+; CHECK-NEXT:                                                Stmt_for_body6(96 * c2 + 4 * c4 + 3, nj - 1, 256 * c1 + c5);
+; CHECK-NEXT:                                            }
+; CHECK-NEXT:                                          }
+; CHECK-NEXT:                                        }
+; CHECK-NEXT:                                      }
+; CHECK-NEXT:                                    }
+; CHECK-NEXT:                                  }
+; CHECK-NEXT:                                }
+; CHECK-NEXT:                              }
+; CHECK-NEXT:                            }
+; CHECK-NEXT:                          }
 ; CHECK-NEXT:                      }
 ; CHECK-NEXT:                    }
-; CHECK-NEXT:                  if (96 * c2 + 95 >= ni)
-; CHECK-NEXT:                    for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
-; CHECK-NEXT:                      // Loop Vectorizer Disabled
-; CHECK-NEXT:                      // Register tiling - Points
-; CHECK-NEXT:                      for (int c6 = 0; c6 < ni % 4; c6 += 1)
-; CHECK-NEXT:                        for (int c7 = 0; c7 <= 7; c7 += 1)
-; CHECK-NEXT:                          Stmt_for_body6(-((ni + 4) % 4) + ni + c6, 2048 * c0 + 8 * c3 + c7, 256 * c1 + c5);
-; CHECK-NEXT:                    }
+; CHECK-NEXT:                  }
 ; CHECK-NEXT:                }
-; CHECK-NEXT:              if (96 * c2 + 3 >= ni || (2048 * c0 + 2047 >= nj && nj % 8 >= 1))
-; CHECK-NEXT:                for (int c3 = 0; c3 <= min(255, -256 * c0 + (nj - 1) / 8); c3 += 1)
-; CHECK-NEXT:                  if (96 * c2 + 3 >= ni || 2048 * c0 + 8 * c3 + 7 >= nj)
-; CHECK-NEXT:                    for (int c4 = 0; c4 <= min(23, -24 * c2 + (ni - 1) / 4); c4 += 1)
-; CHECK-NEXT:                      if ((ni >= 96 * c2 + 4 && 2048 * c0 + 8 * c3 + 7 >= nj) || 1)
-; CHECK-NEXT:                        for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) {
-; CHECK-NEXT:                          // Loop Vectorizer Disabled
-; CHECK-NEXT:                          // Register tiling - Points
-; CHECK-NEXT:                          for (int c6 = 0; c6 <= min(3, ni - 96 * c2 - 4 * c4 - 1); c6 += 1)
-; CHECK-NEXT:                            for (int c7 = 0; c7 <= min(7, nj - 2048 * c0 - 8 * c3 - 1); c7 += 1)
-; CHECK-NEXT:                              Stmt_for_body6(96 * c2 + 4 * c4 + c6, 2048 * c0 + 8 * c3 + c7, 256 * c1 + c5);
-; CHECK-NEXT:                        }
-; CHECK-NEXT:            }
+; CHECK-NEXT:              }
 ; CHECK-NEXT:          }
-; CHECK-NEXT:        }
-; CHECK-NEXT:    }
 ;
 
 ; AUTO-VECTORIZATION:  fmul <4 x double>

Modified: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_6.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_6.ll?rev=312929&r1=312928&r2=312929&view=diff
==============================================================================
--- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_6.ll (original)
+++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_6.ll Mon Sep 11 10:46:47 2017
@@ -90,17 +90,32 @@
 ; CHECK-NEXT:                  Stmt_for_body6(96 * c2 + 4 * c4 + 3, 8 * c3 + 7, 256 * c1 + c5);
 ; CHECK-NEXT:                }
 ; CHECK-NEXT:              }
-; CHECK-NEXT:          for (int c4 = 0; c4 <= min(23, -24 * c2 + 254); c4 += 1)
-; CHECK-NEXT:            for (int c5 = 0; c5 <= min(255, -256 * c1 + 1019); c5 += 1) {
-; CHECK-NEXT:              // Loop Vectorizer Disabled
-; CHECK-NEXT:              // Register tiling - Points
-; CHECK-NEXT:              for (int c6 = 0; c6 <= 3; c6 += 1)
-; CHECK-NEXT:                for (int c7 = 0; c7 <= 3; c7 += 1)
-; CHECK-NEXT:                  Stmt_for_body6(96 * c2 + 4 * c4 + c6, c7 + 1016, 256 * c1 + c5);
+; CHECK-NEXT:              for (int c4 = 0; c4 <= min(23, -24 * c2 + 254); c4 += 1)
+; CHECK-NEXT:                for (int c5 = 0; c5 <= min(255, -256 * c1 + 1019); c5 += 1) {
+; CHECK-NEXT:                  // Loop Vectorizer Disabled
+; CHECK-NEXT:                  // Register tiling - Points
+; CHECK-NEXT:                  {
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4, 1016, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4, 1017, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4, 1018, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4, 1019, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 1, 1016, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 1, 1017, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 1, 1018, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 1, 1019, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 2, 1016, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 2, 1017, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 2, 1018, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 2, 1019, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 3, 1016, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 3, 1017, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 3, 1018, 256 * c1 + c5);
+; CHECK-NEXT:                    Stmt_for_body6(96 * c2 + 4 * c4 + 3, 1019, 256 * c1 + c5);
+; CHECK-NEXT:                  }
+; CHECK-NEXT:                }
 ; CHECK-NEXT:            }
+; CHECK-NEXT:          }
 ; CHECK-NEXT:        }
-; CHECK-NEXT:      }
-; CHECK-NEXT:    }
 ;
 ; AUTO-VECTORIZATION:  fmul <4 x double>
 ; AUTO-VECTORIZATION:  fadd <4 x double>




More information about the llvm-commits mailing list