[llvm-commits] [llvm] r96692 - in /llvm/trunk: lib/Transforms/Scalar/LoopStrengthReduce.cpp test/CodeGen/X86/lsr-wrap.ll

Fri Feb 19 11:32:50 PST 2010

Author: djg
Date: Fri Feb 19 13:32:49 2010
New Revision: 96692

URL: http://llvm.org/viewvc/llvm-project?rev=96692&view=rev
Log:
Check for overflow when scaling up an add or an addrec for
scaled reuse.

Added:
    llvm/trunk/test/CodeGen/X86/lsr-wrap.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=96692&r1=96691&r2=96692&view=diff

==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Fri Feb 19 13:32:49 2010
@@ -337,6 +337,33 @@
   print(errs()); errs() << '\n';
 }
 
+/// isAddRecSExtable - Return true if the given addrec can be sign-extended
+/// without changing its value.
+static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(),
+                     SE.getTypeSizeInBits(AR->getType()) + 1);
+  return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+}
+
+/// isAddSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(),
+                     SE.getTypeSizeInBits(A->getType()) + 1);
+  return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// isMulSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(),
+                     SE.getTypeSizeInBits(A->getType()) + 1);
+  return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
 /// getSDiv - Return an expression for LHS /s RHS, if it can be determined,
 /// or null otherwise. If IgnoreSignificantBits is true, expressions like
 /// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may
@@ -365,33 +392,37 @@
                .sdiv(RC->getValue()->getValue()));
   }
 
-  // Distribute the sdiv over addrec operands.
+  // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
-    const SCEV *Start = getSDiv(AR->getStart(), RHS, SE,
-                                IgnoreSignificantBits);
-    if (!Start) return 0;
-    const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE,
-                               IgnoreSignificantBits);
-    if (!Step) return 0;
-    return SE.getAddRecExpr(Start, Step, AR->getLoop());
+    if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+      const SCEV *Start = getSDiv(AR->getStart(), RHS, SE,
+                                  IgnoreSignificantBits);
+      if (!Start) return 0;
+      const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE,
+                                 IgnoreSignificantBits);
+      if (!Step) return 0;
+      return SE.getAddRecExpr(Start, Step, AR->getLoop());
+    }
   }
 
-  // Distribute the sdiv over add operands.
+  // Distribute the sdiv over add operands, if the add doesn't overflow.
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
-    SmallVector<const SCEV *, 8> Ops;
-    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
-         I != E; ++I) {
-      const SCEV *Op = getSDiv(*I, RHS, SE,
-                               IgnoreSignificantBits);
-      if (!Op) return 0;
-      Ops.push_back(Op);
+    if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
+      SmallVector<const SCEV *, 8> Ops;
+      for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+           I != E; ++I) {
+        const SCEV *Op = getSDiv(*I, RHS, SE,
+                                 IgnoreSignificantBits);
+        if (!Op) return 0;
+        Ops.push_back(Op);
+      }
+      return SE.getAddExpr(Ops);
     }
-    return SE.getAddExpr(Ops);
   }
 
   // Check for a multiply operand that we can pull RHS out of.
   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
-    if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) {
+    if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
       SmallVector<const SCEV *, 4> Ops;
       bool Found = false;
       for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();

Added: llvm/trunk/test/CodeGen/X86/lsr-wrap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-wrap.ll?rev=96692&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-wrap.ll (added)
+++ llvm/trunk/test/CodeGen/X86/lsr-wrap.ll Fri Feb 19 13:32:49 2010
@@ -0,0 +1,37 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; LSR would like to use a single IV for both of these, however it's
+; not safe due to wraparound.
+
+; CHECK: addb  $-4, %r
+; CHECK: decw  %
+
+ at g_19 = common global i32 0                       ; <i32*> [#uses=2]
+
+declare i32 @func_8(i8 zeroext) nounwind
+
+declare i32 @func_3(i8 signext) nounwind
+
+define void @func_1() nounwind {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %indvar = phi i16 [ 0, %entry ], [ %indvar.next, %bb ] ; <i16> [#uses=2]
+  %tmp = sub i16 0, %indvar                       ; <i16> [#uses=1]
+  %tmp27 = trunc i16 %tmp to i8                   ; <i8> [#uses=1]
+  %tmp1 = load i32* @g_19, align 4                ; <i32> [#uses=2]
+  %tmp2 = add i32 %tmp1, 1                        ; <i32> [#uses=1]
+  store i32 %tmp2, i32* @g_19, align 4
+  %tmp3 = trunc i32 %tmp1 to i8                   ; <i8> [#uses=1]
+  %tmp4 = tail call i32 @func_8(i8 zeroext %tmp3) nounwind ; <i32> [#uses=0]
+  %tmp5 = shl i8 %tmp27, 2                        ; <i8> [#uses=1]
+  %tmp6 = add i8 %tmp5, -112                      ; <i8> [#uses=1]
+  %tmp7 = tail call i32 @func_3(i8 signext %tmp6) nounwind ; <i32> [#uses=0]
+  %indvar.next = add i16 %indvar, 1               ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %indvar.next, -28       ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}