[polly] r284848 - [SCEVAffinator] Make precise modular math more correct.

Fri Oct 21 11:08:03 PDT 2016

Author: efriedma
Date: Fri Oct 21 13:08:02 2016
New Revision: 284848

URL: http://llvm.org/viewvc/llvm-project?rev=284848&view=rev
Log:
[SCEVAffinator] Make precise modular math more correct.

Integer math in LLVM IR is modular. Integer math in isl is
arbitrary-precision. Modeling LLVM IR math correctly in isl requires
either adding assumptions that math doesn't actually overflow, or
explicitly wrapping the math. However, expressions with the "nsw" flag
are special; we can pretend they're arbitrary-precision because it's
undefined behavior if the result wraps. SCEV expressions based on IR
instructions with an nsw flag also carry an nsw flag (roughly; actually,
the real rule is a bit more complicated, but the details don't matter
here).

Before this patch, SCEV flags were also overloaded with an additional
function: the ZExt code was mutating SCEV expressions as a hack to
indicate to checkForWrapping that we don't need to add assumptions to
the operand of a ZExt; it'll add explicit wrapping itself. This kind of
works... the problem is that if anything else ever touches that SCEV
expression, it'll get confused by the incorrect flags.

Instead, with this patch, we make the decision about whether to
explicitly wrap the math a bit earlier, basing the decision purely on
the SCEV expression itself, and not its users.

Differential Revision: https://reviews.llvm.org/D25287


Modified:
    polly/trunk/include/polly/Support/SCEVAffinator.h
    polly/trunk/lib/Support/SCEVAffinator.cpp
    polly/trunk/test/ScopInfo/infeasible-rtc.ll
    polly/trunk/test/ScopInfo/integers.ll
    polly/trunk/test/ScopInfo/truncate-1.ll
    polly/trunk/test/ScopInfo/truncate-2.ll
    polly/trunk/test/ScopInfo/zero_ext_of_truncate.ll

Modified: polly/trunk/include/polly/Support/SCEVAffinator.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/Support/SCEVAffinator.h?rev=284848&r1=284847&r2=284848&view=diff
==============================================================================

--- polly/trunk/include/polly/Support/SCEVAffinator.h (original)
+++ polly/trunk/include/polly/Support/SCEVAffinator.h Fri Oct 21 13:08:02 2016
@@ -113,6 +113,10 @@ private:
   /// @returns The isl representation @p PWAC with a posisbly adjusted domain.
   __isl_give PWACtx checkForWrapping(const llvm::SCEV *Expr, PWACtx PWAC) const;
 
+  /// Whether to track the value of this expression precisely, rather than
+  /// assuming it won't wrap.
+  bool computeModuloForExpr(const llvm::SCEV *Expr);
+
   __isl_give PWACtx visit(const llvm::SCEV *E);
   __isl_give PWACtx visitConstant(const llvm::SCEVConstant *E);
   __isl_give PWACtx visitTruncateExpr(const llvm::SCEVTruncateExpr *E);

Modified: polly/trunk/lib/Support/SCEVAffinator.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Support/SCEVAffinator.cpp?rev=284848&r1=284847&r2=284848&view=diff
==============================================================================
--- polly/trunk/lib/Support/SCEVAffinator.cpp (original)
+++ polly/trunk/lib/Support/SCEVAffinator.cpp Fri Oct 21 13:08:02 2016
@@ -36,21 +36,9 @@ static cl::opt<bool> IgnoreIntegerWrappi
 // compile time.
 static int const MaxDisjunctionsInPwAff = 100;
 
-// The maximal number of bits for which a zero-extend is modeled precisely.
-static unsigned const MaxZextSmallBitWidth = 7;
-
-// The maximal number of bits for which a truncate is modeled precisely.
-static unsigned const MaxTruncateSmallBitWidth = 31;
-
-/// Return true if a zero-extend from @p Width bits is precisely modeled.
-static bool isPreciseZeroExtend(unsigned Width) {
-  return Width <= MaxZextSmallBitWidth;
-}
-
-/// Return true if a truncate from @p Width bits is precisely modeled.
-static bool isPreciseTruncate(unsigned Width) {
-  return Width <= MaxTruncateSmallBitWidth;
-}
+// The maximal number of bits for which a general expression is modeled
+// precisely.
+static unsigned const MaxSmallBitWidth = 7;
 
 /// Add the number of basic sets in @p Domain to @p User
 static isl_stat addNumBasicSets(__isl_take isl_set *Domain,
@@ -99,26 +87,6 @@ static void combine(__isl_keep PWACtx &P
   PWAC0.second = isl_set_union(PWAC0.second, PWAC1.second);
 }
 
-/// Set the possible wrapping of @p Expr to @p Flags.
-static const SCEV *setNoWrapFlags(ScalarEvolution &SE, const SCEV *Expr,
-                                  SCEV::NoWrapFlags Flags) {
-  auto *NAry = dyn_cast<SCEVNAryExpr>(Expr);
-  if (!NAry)
-    return Expr;
-
-  SmallVector<const SCEV *, 8> Ops(NAry->op_begin(), NAry->op_end());
-  switch (Expr->getSCEVType()) {
-  case scAddExpr:
-    return SE.getAddExpr(Ops, Flags);
-  case scMulExpr:
-    return SE.getMulExpr(Ops, Flags);
-  case scAddRecExpr:
-    return SE.getAddRecExpr(Ops, cast<SCEVAddRecExpr>(Expr)->getLoop(), Flags);
-  default:
-    return Expr;
-  }
-}
-
 static __isl_give isl_pw_aff *getWidthExpValOnDomain(unsigned Width,
                                                      __isl_take isl_set *Dom) {
   auto *Ctx = isl_set_get_ctx(Dom);
@@ -241,6 +209,15 @@ bool SCEVAffinator::hasNSWAddRecForLoop(
   return false;
 }
 
+bool SCEVAffinator::computeModuloForExpr(const SCEV *Expr) {
+  unsigned Width = TD.getTypeSizeInBits(Expr->getType());
+  // We assume nsw expressions never overflow.
+  if (auto *NAry = dyn_cast<SCEVNAryExpr>(Expr))
+    if (NAry->getNoWrapFlags() & SCEV::FlagNSW)
+      return false;
+  return Width <= MaxSmallBitWidth;
+}
+
 __isl_give PWACtx SCEVAffinator::visit(const SCEV *Expr) {
 
   auto Key = std::make_pair(Expr, BB);
@@ -267,16 +244,23 @@ __isl_give PWACtx SCEVAffinator::visit(c
     PWAC = getPWACtxFromPWA(isl_pw_aff_alloc(Domain, Affine));
   } else {
     PWAC = SCEVVisitor<SCEVAffinator, PWACtx>::visit(Expr);
-    PWAC = checkForWrapping(Expr, PWAC);
+    if (computeModuloForExpr(Expr))
+      PWAC.first = addModuloSemantic(PWAC.first, Expr->getType());
+    else
+      PWAC = checkForWrapping(Expr, PWAC);
   }
 
-  if (!Factor->getType()->isIntegerTy(1))
+  if (!Factor->getType()->isIntegerTy(1)) {
     combine(PWAC, visitConstant(Factor), isl_pw_aff_mul);
+    if (computeModuloForExpr(Key.first))
+      PWAC.first = addModuloSemantic(PWAC.first, Expr->getType());
+  }
 
   // For compile time reasons we need to simplify the PWAC before we cache and
   // return it.
   PWAC.first = isl_pw_aff_coalesce(PWAC.first);
-  PWAC = checkForWrapping(Key.first, PWAC);
+  if (!computeModuloForExpr(Key.first))
+    PWAC = checkForWrapping(Key.first, PWAC);
 
   CachedExpressions[Key] = copyPWACtx(PWAC);
   return PWAC;
@@ -314,12 +298,9 @@ SCEVAffinator::visitTruncateExpr(const S
   auto OpPWAC = visit(Op);
 
   unsigned Width = TD.getTypeSizeInBits(Expr->getType());
-  bool Precise = isPreciseTruncate(Width);
 
-  if (Precise) {
-    OpPWAC.first = addModuloSemantic(OpPWAC.first, Expr->getType());
+  if (computeModuloForExpr(Expr))
     return OpPWAC;
-  }
 
   auto *Dom = isl_pw_aff_domain(isl_pw_aff_copy(OpPWAC.first));
   auto *ExpPWA = getWidthExpValOnDomain(Width - 1, Dom);
@@ -380,28 +361,17 @@ SCEVAffinator::visitZeroExtendExpr(const
   // assumptions and assume the "former negative" piece will not exist.
 
   auto *Op = Expr->getOperand();
-  unsigned Width = TD.getTypeSizeInBits(Op->getType());
-
-  bool Precise = isPreciseZeroExtend(Width);
-
-  auto Flags = getNoWrapFlags(Op);
-  auto NoWrapFlags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
-  bool OpCanWrap = Precise && !(Flags & SCEV::FlagNSW);
-  if (OpCanWrap)
-    Op = setNoWrapFlags(SE, Op, NoWrapFlags);
-
   auto OpPWAC = visit(Op);
-  if (OpCanWrap)
-    OpPWAC.first = addModuloSemantic(OpPWAC.first, Op->getType());
 
   // If the width is to big we assume the negative part does not occur.
-  if (!Precise) {
+  if (!computeModuloForExpr(Op)) {
     takeNonNegativeAssumption(OpPWAC);
     return OpPWAC;
   }
 
   // If the width is small build the piece for the non-negative part and
   // the one for the negative part and unify them.
+  unsigned Width = TD.getTypeSizeInBits(Op->getType());
   interpretAsUnsigned(OpPWAC, Width);
   return OpPWAC;
 }

Modified: polly/trunk/test/ScopInfo/infeasible-rtc.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/infeasible-rtc.ll?rev=284848&r1=284847&r2=284848&view=diff
==============================================================================
--- polly/trunk/test/ScopInfo/infeasible-rtc.ll (original)
+++ polly/trunk/test/ScopInfo/infeasible-rtc.ll Fri Oct 21 13:08:02 2016
@@ -4,24 +4,47 @@
 ; RUN: opt %loadPolly -polly-scops -analyze < %s \
 ; RUN:  | FileCheck %s -check-prefix=SCOPS
 
-; DETECT: Valid Region for Scop: header => exit
-; SCOPS-NOT: Region: %header---%exit
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; DETECT: Valid Region for Scop: test1.header => test1.exit
+; SCOPS-NOT: Region: %test1.header---%test1.exit
 
 ; Verify that we detect this scop, but that, due to an infeasible run-time
 ; check, we refuse to model it.
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+define void @test(i64* %a) nounwind uwtable {
+preheader:
+  br label %test1.header
+
+test1.header:
+  %i = phi i56 [ 0, %preheader ], [ %i.1, %test1.header ]
+  %tmp = zext i56 %i to i64
+  %A.addr = getelementptr i64, i64* %a, i64 %tmp
+  %A.load = load i64, i64* %A.addr, align 4
+  %A.inc = zext i56 %i to i64
+  %A.val = add nsw i64 %A.load, %A.inc
+  store i64 %A.val, i64* %A.addr, align 4
+  %i.1 = add i56 %i, 1
+  %exitcond = icmp eq i56 %i.1, 0
+  br i1 %exitcond, label %test1.exit, label %test1.header
+
+test1.exit:
+  ret void
+}
+
+; Old version of the previous test; make sure we compute the trip count
+; correctly.
 
- at A = common global [128 x i32] zeroinitializer, align 16
+; SCOPS: { Stmt_header[i0] : 0 <= i0 <= 127 };
 
-define void @test() nounwind uwtable {
+define void @test2([128 x i32]* %a) nounwind uwtable {
 preheader:
   br label %header
 
 header:
   %i = phi i7 [ 0, %preheader ], [ %i.1, %header ]
   %tmp = zext i7 %i to i64
-  %A.addr = getelementptr [128 x i32], [128 x i32]* @A, i64 0, i64 %tmp
+  %A.addr = getelementptr [128 x i32], [128 x i32]* %a, i64 0, i64 %tmp
   %A.load = load i32, i32* %A.addr, align 4
   %A.inc = zext i7 %i to i32
   %A.val = add nsw i32 %A.load, %A.inc

Modified: polly/trunk/test/ScopInfo/integers.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/integers.ll?rev=284848&r1=284847&r2=284848&view=diff
==============================================================================
--- polly/trunk/test/ScopInfo/integers.ll (original)
+++ polly/trunk/test/ScopInfo/integers.ll Fri Oct 21 13:08:02 2016
@@ -111,8 +111,24 @@ bb:
   store i3 %indvar, i3* %scevgep, align 8
   %indvar.next = add nsw i3 %indvar, 1
   %sub = sub i3 %n, 3
-; CHECK:  'bb => return' in function 'f6'
-; CHECK: [n] -> { Stmt_bb[0] : n = 3 };
+; CHECK-LABEL:  'bb => return' in function 'f6'
+; CHECK:         Context:
+; CHECK-NEXT:    [n] -> {  : -4 <= n <= 3 }
+; CHECK-NEXT:    Assumed Context:
+; CHECK-NEXT:    [n] -> {  :  }
+; CHECK-NEXT:    Invalid Context:
+; CHECK-NEXT:    [n] -> {  : 1 = 0 }
+
+; CHECK:     Statements {
+; CHECK-NEXT:    Stmt_bb
+; CHECK-NEXT:        Domain :=
+; CHECK-NEXT:            [n] -> { Stmt_bb[i0] : i0 >= 0 and 8*floor((2 - n)/8) >= -5 - n + i0 and 8*floor((2 - n)/8) <= -2 - n };
+; CHECK-NEXT:        Schedule :=
+; CHECK-NEXT:            [n] -> { Stmt_bb[i0] -> [i0] };
+; CHECK-NEXT:        MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:            [n] -> { Stmt_bb[i0] -> MemRef_a[i0] };
+; CHECK-NEXT:}
+
   %exitcond = icmp eq i3 %indvar, %sub
   br i1 %exitcond, label %return, label %bb
 

Modified: polly/trunk/test/ScopInfo/truncate-1.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/truncate-1.ll?rev=284848&r1=284847&r2=284848&view=diff
==============================================================================
--- polly/trunk/test/ScopInfo/truncate-1.ll (original)
+++ polly/trunk/test/ScopInfo/truncate-1.ll Fri Oct 21 13:08:02 2016
@@ -5,13 +5,14 @@
 ;        A[i]++;
 ;    }
 ;
+; FIXME: We should the truncate precisely... or just make it a separate parameter.
 ; CHECK:       Assumed Context:
 ; CHECK-NEXT:  [N] -> {  :  }
 ; CHECK-NEXT:  Invalid Context:
-; CHECK-NEXT:  [N] -> {  : 1 = 0 }
+; CHECK-NEXT:  [N] -> { : N <= -129 or N >= 128 }
 ;
 ; CHECK:       Domain :=
-; CHECK-NEXT:    [N] -> { Stmt_for_body[i0] : i0 >= 0 and 256*floor((128 + N)/256) < N - i0 };
+; CHECK-NEXT:    [N] -> { Stmt_for_body[i0] : 0 <= i0 < N };
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 

Modified: polly/trunk/test/ScopInfo/truncate-2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/truncate-2.ll?rev=284848&r1=284847&r2=284848&view=diff
==============================================================================
--- polly/trunk/test/ScopInfo/truncate-2.ll (original)
+++ polly/trunk/test/ScopInfo/truncate-2.ll Fri Oct 21 13:08:02 2016
@@ -5,15 +5,16 @@
 ;        A[(char)(N)]++;
 ;    }
 ;
+; FIXME: We should the truncate precisely... or just make it a separate parameter.
 ; CHECK:       Assumed Context:
 ; CHECK-NEXT:  [N] -> {  :  }
 ; CHECK-NEXT:  Invalid Context:
-; CHECK-NEXT:  [N] -> {  : 1 = 0 }
+; CHECK-NEXT:  [N] -> { : N >= 128 }
 ;
 ; CHECK:         ReadAccess :=	[Reduction Type: +] [Scalar: 0]
-; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 };
+; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[N] };
 ; CHECK-NEXT:    MustWriteAccess :=	[Reduction Type: +] [Scalar: 0]
-; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 };
+; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[N] };
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 

Modified: polly/trunk/test/ScopInfo/zero_ext_of_truncate.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/ScopInfo/zero_ext_of_truncate.ll?rev=284848&r1=284847&r2=284848&view=diff
==============================================================================
--- polly/trunk/test/ScopInfo/zero_ext_of_truncate.ll (original)
+++ polly/trunk/test/ScopInfo/zero_ext_of_truncate.ll Fri Oct 21 13:08:02 2016
@@ -9,13 +9,14 @@
 ;      }
 ;    }
 ;
+; FIXME: The truncated value should be a paramter.
 ; CHECK:         Assumed Context:
 ; CHECK-NEXT:    [N, tmp, M] -> { : }
 ; CHECK-NEXT:    Invalid Context:
-; CHECK-NEXT:    [N, tmp, M] -> { : N < 0 or (N > 0 and M < 0) or (N > 0 and 256*floor((128 + tmp)/256) > tmp) }
+; CHECK-NEXT:    [N, tmp, M] -> { : N < 0 or (N > 0 and tmp >= 128) or (N > 0 and tmp < 0) or (N > 0 and M < 0) }
 ;
 ; CHECK:         Domain :=
-; CHECK-NEXT:    [N, tmp, M] -> { Stmt_if_then[i0] : 0 <= i0 < N and 256*floor((128 + tmp)/256) > tmp - M };
+; CHECK-NEXT:    [N, tmp, M] -> { Stmt_if_then[i0] : M > tmp and 0 <= i0 < N };
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"