[llvm-commits] [llvm] r154119 - in /llvm/trunk: lib/CodeGen/SelectionDAG/TargetLowering.cpp test/CodeGen/Thumb2/lsr-deficiency.ll test/CodeGen/X86/lsr-loop-exit-cond.ll
Jakob Stoklund Olesen
stoklund at 2pi.dk
Thu Apr 5 13:30:20 PDT 2012
Author: stoklund
Date: Thu Apr 5 15:30:20 2012
New Revision: 154119
URL: http://llvm.org/viewvc/llvm-project?rev=154119&view=rev
Log:
Don't break the IV update in TLI::SimplifySetCC().
LSR always tries to make the ICmp in the loop latch use the incremented
induction variable. This allows the induction variable to be kept in a
single register.
When the induction variable limit is equal to the stride,
SimplifySetCC() would break LSR's hard work by transforming:
(icmp (add iv, stride), stride) --> (cmp iv, 0)
This forced us to use lea for the IC update, preventing the simpler
incl+cmp.
<rdar://problem/7643606>
<rdar://problem/11184260>
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/test/CodeGen/Thumb2/lsr-deficiency.ll
llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=154119&r1=154118&r2=154119&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Thu Apr 5 15:30:20 2012
@@ -2471,6 +2471,10 @@
}
}
+ // If RHS is a legal immediate value for a compare instruction, we need
+ // to be careful about increasing register pressure needlessly.
+ bool LegalRHSImm = false;
+
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
@@ -2505,25 +2509,33 @@
Cond);
}
}
+
+ // Could RHSC fold directly into a compare?
+ if (RHSC->getValueType(0).getSizeInBits() <= 64)
+ LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
}
// Simplify (X+Z) == X --> Z == 0
- if (N0.getOperand(0) == N1)
- return DAG.getSetCC(dl, VT, N0.getOperand(1),
- DAG.getConstant(0, N0.getValueType()), Cond);
- if (N0.getOperand(1) == N1) {
- if (DAG.isCommutativeBinOp(N0.getOpcode()))
- return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(0, N0.getValueType()), Cond);
- else if (N0.getNode()->hasOneUse()) {
- assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
- // (Z-X) == X --> Z == X<<1
- SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
- N1,
+ // Don't do this if X is an immediate that can fold into a cmp
+ // instruction and X+Z has other uses. It could be an induction variable
+ // chain, and the transform would increase register pressure.
+ if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
+ if (N0.getOperand(0) == N1)
+ return DAG.getSetCC(dl, VT, N0.getOperand(1),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ if (N0.getOperand(1) == N1) {
+ if (DAG.isCommutativeBinOp(N0.getOpcode()))
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, N0.getValueType()), Cond);
+ else if (N0.getNode()->hasOneUse()) {
+ assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+ // (Z-X) == X --> Z == X<<1
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
- if (!DCI.isCalledByLegalizer())
- DCI.AddToWorklist(SH.getNode());
- return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(SH.getNode());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+ }
}
}
}
Modified: llvm/trunk/test/CodeGen/Thumb2/lsr-deficiency.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/lsr-deficiency.ll?rev=154119&r1=154118&r2=154119&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/lsr-deficiency.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/lsr-deficiency.ll Thu Apr 5 15:30:20 2012
@@ -3,11 +3,6 @@
; This now reduces to a single induction variable.
-; TODO: It still gets a GPR shuffle at the end of the loop
-; This is because something in instruction selection has decided
-; that comparing the pre-incremented value with zero is better
-; than comparing the post-incremented value with -4.
-
@G = external global i32 ; <i32*> [#uses=2]
@array = external global i32* ; <i32**> [#uses=1]
@@ -20,9 +15,9 @@
bb: ; preds = %bb, %entry
; CHECK: LBB0_1:
-; CHECK: cmp [[R2:r[0-9]+]], #0
-; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], [[R2]], #1
-; CHECK: mov [[R2]], [[REGISTER]]
+; CHECK: subs [[R2:r[0-9]+]], #1
+; CHECK: cmp.w [[R2]], #-1
+; CHECK: bne LBB0_1
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
Modified: llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll?rev=154119&r1=154118&r2=154119&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll Thu Apr 5 15:30:20 2012
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
+; CHECK: t:
; CHECK: decq
; CHECK-NEXT: movl (
; CHECK-NEXT: jne
@@ -136,3 +137,44 @@
store i8 %92, i8* %93, align 1
ret void
}
+
+; Check that DAGCombiner doesn't mess up the IV update when the exiting value
+; is equal to the stride.
+; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0).
+
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: incl [[IV:%e..]]
+; CHECK: cmpl $1, [[IV]]
+; CHECK: jne
+; CHECK: ret
+
+define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+ %cmp4 = icmp eq i32 %i, 1
+ br i1 %cmp4, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ %0 = sext i32 %i to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
+ %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %1 = load i32* %arrayidx, align 4
+ %cmp1 = icmp ugt i32 %1, %b.05
+ %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
+ %2 = trunc i64 %indvars.iv to i32
+ %i.addr.0.bi.0 = select i1 %cmp1, i32 %2, i32 %bi.06
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+ ret i32 %bi.0.lcssa
+}
+
More information about the llvm-commits
mailing list