[llvm] r225034 - InstCombine: try to transform A-B < 0 into A < B

Wed Jan 14 05:25:11 PST 2015

Hi David,

It turns out this is causing quite a lot of performance regressions.

One of the issues is that this messes up IndVar simplification.

For this code:
int foo (int *p, int M, int N, int K)
{
    int ret = 0;
    for (int i = 0; i < M-N; i++)
        ret += p[i * K];

    return ret;
}

What we get coming into IndVar Simplify before the patch is:
define i32 @foo(i32* nocapture readonly %p, i32 %M, i32 %N, i32 %K) #0 {
entry:
  %sub = sub nsw i32 %M, %N
  %cmp4 = icmp sgt i32 %sub, 0
  br i1 %cmp4, label %for.body.lr.ph, label %for.end

for.body.lr.ph:                                   ; preds = %entry
  br label %for.body

for.body:                                         ; preds = %for.body.lr.ph, %for.body
  %i.06 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
  %ret.05 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
  %mul = mul nsw i32 %i.06, %K
  %arrayidx = getelementptr inbounds i32* %p, i32 %mul
  %0 = load i32* %arrayidx, align 4, !tbaa !1
  %add = add nsw i32 %0, %ret.05
  %inc = add nsw i32 %i.06, 1
  %cmp = icmp slt i32 %inc, %sub
  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge

for.cond.for.end_crit_edge:                       ; preds = %for.body
  %add.lcssa = phi i32 [ %add, %for.body ]
  br label %for.end

for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
  %ret.0.lcssa = phi i32 [ %add.lcssa, %for.cond.for.end_crit_edge ], [ 0, %entry ]
  ret i32 %ret.0.lcssa
}

IndVar Simplify manages to compute the SCEV for the induction variable:
INDVARS: Rewriting loop exit condition to:
      LHS:  %i.06 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
       op:	!=
      RHS:	  %1 = sub i32 %0, %N
  IVCount:	(-1 + (-1 * %N) + %M)

The condition on the backedge gets replaced with an "icmp ne", and the final assembly for the loop body is:
.LBB0_2:
	addl	(%esi), %eax
	addl	%edx, %esi
	decl	%ecx
	jne	.LBB0_2

With r225034, IndVar Simplify doesn't fire, we drag the "icmp slt" until the end, and end up with:
.LBB0_2:
	addl	(%esi), %eax
	incl	%edi
	addl	%edx, %esi
	cmpl	%ecx, %edi
	jl	.LBB0_2

Note that this is not the only issue we're seeing, some of the regressions don't appear to be IndVar-related.
It may be a good idea to revert r225034 and do more extensive performance testing before re-committing.

Thanks,
   Michael

-----Original Message-----
From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-bounces at cs.uiuc.edu] On Behalf Of David Majnemer
Sent: Wednesday, December 31, 2014 06:22
To: llvm-commits at cs.uiuc.edu
Subject: [llvm] r225034 - InstCombine: try to transform A-B < 0 into A < B

Author: majnemer
Date: Tue Dec 30 22:21:41 2014
New Revision: 225034

URL: http://llvm.org/viewvc/llvm-project?rev=225034&view=rev
Log:
InstCombine: try to transform A-B < 0 into A < B

We are allowed to move the 'B' to the right hand side if we an prove there is no signed overflow and if the comparison itself is signed.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/trunk/test/Transforms/InstCombine/icmp.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=225034&r1=225033&r2=225034&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp Tue 
+++ Dec 30 22:21:41 2014
@@ -2692,6 +2692,26 @@ Instruction *InstCombiner::visitICmpInst
       return new ICmpInst(I.getPredicate(), A, B);
     }
 
+    // (icmp sgt (sub nsw A B), -1) -> (icmp sge A, B)
+    if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isAllOnesValue() &&
+        match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
+      return new ICmpInst(ICmpInst::ICMP_SGE, A, B);
+
+    // (icmp sgt (sub nsw A B), 0) -> (icmp sgt A, B)
+    if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isZero() &&
+        match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
+      return new ICmpInst(ICmpInst::ICMP_SGT, A, B);
+
+    // (icmp slt (sub nsw A B), 0) -> (icmp slt A, B)
+    if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isZero() &&
+        match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
+      return new ICmpInst(ICmpInst::ICMP_SLT, A, B);
+
+    // (icmp slt (sub nsw A B), 1) -> (icmp sle A, B)
+    if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isOne() &&
+        match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
+      return new ICmpInst(ICmpInst::ICMP_SLE, A, B);
+
     // If we have an icmp le or icmp ge instruction, turn it into the
     // appropriate icmp lt or icmp gt instruction.  This allows us to rely on
     // them being folded in the code below.  The SimplifyICmpInst code has

Modified: llvm/trunk/test/Transforms/InstCombine/icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/icmp.ll?rev=225034&r1=225033&r2=225034&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/icmp.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/icmp.ll Tue Dec 30 22:21:41 
+++ 2014
@@ -1522,3 +1522,39 @@ define zeroext i1 @icmp_cmpxchg_strong(i
   %icmp = icmp eq i32 %xtrc, %old_val
   ret i1 %icmp
 }
+
+; CHECK-LABEL: @f1
+; CHECK-NEXT: %[[cmp:.*]] = icmp sge i64 %a, %b ; CHECK-NEXT: ret i1 
+%[[cmp]] define i1 @f1(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp sge i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f2
+; CHECK-NEXT: %[[cmp:.*]] = icmp sgt i64 %a, %b ; CHECK-NEXT: ret i1 
+%[[cmp]] define i1 @f2(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp sgt i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f3
+; CHECK-NEXT: %[[cmp:.*]] = icmp slt i64 %a, %b ; CHECK-NEXT: ret i1 
+%[[cmp]] define i1 @f3(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp slt i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f4
+; CHECK-NEXT: %[[cmp:.*]] = icmp sle i64 %a, %b ; CHECK-NEXT: ret i1 
+%[[cmp]] define i1 @f4(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp sle i64 %t, 0
+  ret i1 %v
+}


_______________________________________________
llvm-commits mailing list
llvm-commits at cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
---------------------------------------------------------------------
Intel Israel (74) Limited

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.