[llvm-commits] [llvm] r102157 - in /llvm/trunk: lib/Analysis/README.txt lib/Transforms/Scalar/LoopStrengthReduce.cpp test/CodeGen/X86/lsr-delayed-fold.ll
Dan Gohman
gohman at apple.com
Thu Apr 22 18:55:05 PDT 2010
Author: djg
Date: Thu Apr 22 20:55:05 2010
New Revision: 102157
URL: http://llvm.org/viewvc/llvm-project?rev=102157&view=rev
Log:
Fix LSR to tolerate cases where ScalarEvolution initially
misses an opportunity to fold add operands, but folds them
after LSR has separated them out. This fixes rdar://7886751.
Added:
llvm/trunk/test/CodeGen/X86/lsr-delayed-fold.ll
Modified:
llvm/trunk/lib/Analysis/README.txt
llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Modified: llvm/trunk/lib/Analysis/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/README.txt?rev=102157&r1=102156&r2=102157&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/README.txt (original)
+++ llvm/trunk/lib/Analysis/README.txt Thu Apr 22 20:55:05 2010
@@ -16,3 +16,15 @@
which is very inefficient when expanded into code.
//===---------------------------------------------------------------------===//
+
+In test/CodeGen/X86/lsr-delayed-fold.ll,
+
+ScalarEvolution is forming this expression:
+
+((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32)))
+
+This could be folded to
+
+(-1 * (trunc i64 undef to i32))
+
+//===---------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=102157&r1=102156&r2=102157&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Thu Apr 22 20:55:05 2010
@@ -2060,8 +2060,11 @@
LU.Kind, LU.AccessTy, TLI, SE))
continue;
+ const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+ if (InnerSum->isZero())
+ continue;
Formula F = Base;
- F.BaseRegs[i] = SE.getAddExpr(InnerAddOps);
+ F.BaseRegs[i] = InnerSum;
F.BaseRegs.push_back(*J);
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
Added: llvm/trunk/test/CodeGen/X86/lsr-delayed-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-delayed-fold.ll?rev=102157&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-delayed-fold.ll (added)
+++ llvm/trunk/test/CodeGen/X86/lsr-delayed-fold.ll Thu Apr 22 20:55:05 2010
@@ -0,0 +1,28 @@
+; RUN: llc -march=x86-64 < %s > /dev/null
+; rdar://7886751
+
+; ScalarEvolution misses an opportunity to fold ((trunc x) + (trunc -x) + y),
+; but LSR should tolerate this.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+define fastcc void @formatValue(i64 %arg5) nounwind {
+bb12: ; preds = %bb11
+ %t = trunc i64 %arg5 to i32 ; <i32> [#uses=1]
+ %t13 = sub i64 0, %arg5 ; <i64> [#uses=1]
+ %t14 = and i64 %t13, 4294967295 ; <i64> [#uses=1]
+ br label %bb15
+
+bb15: ; preds = %bb21, %bb12
+ %t16 = phi i64 [ 0, %bb12 ], [ %t23, %bb15 ] ; <i64> [#uses=2]
+ %t17 = mul i64 %t14, %t16 ; <i64> [#uses=1]
+ %t18 = add i64 undef, %t17 ; <i64> [#uses=1]
+ %t19 = trunc i64 %t18 to i32 ; <i32> [#uses=1]
+ %t22 = icmp eq i32 %t19, %t ; <i1> [#uses=1]
+ %t23 = add i64 %t16, 1 ; <i64> [#uses=1]
+ br i1 %t22, label %bb24, label %bb15
+
+bb24: ; preds = %bb21, %bb11
+ unreachable
+}
More information about the llvm-commits
mailing list