[llvm-commits] [llvm] r66358 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/2009-03-07-FPConstSelect.ll test/CodeGen/X86/pic-cpool.ll
Chris Lattner
sabre at nondot.org
Sat Mar 7 17:51:31 PST 2009
Author: lattner
Date: Sat Mar 7 19:51:30 2009
New Revision: 66358
URL: http://llvm.org/viewvc/llvm-project?rev=66358&view=rev
Log:
implement an optimization to codegen c ? 1.0 : 2.0 as load { 2.0, 1.0 } + c*4.
For 2009-03-07-FPConstSelect.ll we now produce:
_f:
xorl %eax, %eax
testl %edi, %edi
movl $4, %ecx
cmovne %rax, %rcx
leaq LCPI1_0(%rip), %rax
movss (%rcx,%rax), %xmm0
ret
previously we produced:
_f:
subl $4, %esp
cmpl $0, 8(%esp)
movss LCPI1_0, %xmm0
je LBB1_2 ## entry
LBB1_1: ## entry
movss LCPI1_1, %xmm0
LBB1_2: ## entry
movss %xmm0, (%esp)
flds (%esp)
addl $4, %esp
ret
on PPC the code also improves to:
_f:
cntlzw r2, r3
srwi r2, r2, 5
li r3, lo16(LCPI1_0)
slwi r2, r2, 2
addis r3, r3, ha16(LCPI1_0)
lfsx f1, r3, r2
blr
from:
_f:
li r2, lo16(LCPI1_1)
cmplwi cr0, r3, 0
addis r2, r2, ha16(LCPI1_1)
beq cr0, LBB1_2 ; entry
LBB1_1: ; entry
li r2, lo16(LCPI1_0)
addis r2, r2, ha16(LCPI1_0)
LBB1_2: ; entry
lfs f1, 0(r2)
blr
This also improves the existing pic-cpool case from:
foo:
subl $12, %esp
call .Lllvm$1.$piclabel
.Lllvm$1.$piclabel:
popl %eax
addl $_GLOBAL_OFFSET_TABLE_ + [.-.Lllvm$1.$piclabel], %eax
cmpl $0, 16(%esp)
movsd .LCPI1_0 at GOTOFF(%eax), %xmm0
je .LBB1_2 # entry
.LBB1_1: # entry
movsd .LCPI1_1 at GOTOFF(%eax), %xmm0
.LBB1_2: # entry
movsd %xmm0, (%esp)
fldl (%esp)
addl $12, %esp
ret
to:
foo:
call .Lllvm$1.$piclabel
.Lllvm$1.$piclabel:
popl %eax
addl $_GLOBAL_OFFSET_TABLE_ + [.-.Lllvm$1.$piclabel], %eax
xorl %ecx, %ecx
cmpl $0, 4(%esp)
movl $8, %edx
cmovne %ecx, %edx
fldl .LCPI1_0 at GOTOFF(%eax,%edx)
ret
This triggers a few dozen times in spec FP 2000.
Added:
llvm/trunk/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/pic-cpool.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=66358&r1=66357&r2=66358&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Mar 7 19:51:30 2009
@@ -14,8 +14,10 @@
#define DEBUG_TYPE "dagcombine"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameInfo.h"
@@ -2890,8 +2892,7 @@
return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1),
N1, N2, N0.getOperand(2));
- else
- return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
+ return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
}
return SDValue();
@@ -5674,9 +5675,14 @@
return false;
}
+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3,
ISD::CondCode CC, bool NotExtCompare) {
+ // (x ? y : y) -> y.
+ if (N2 == N3) return N2;
+
MVT VT = N2.getValueType();
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
@@ -5712,6 +5718,51 @@
return DAG.getNode(ISD::FABS, DL, VT, N3);
}
}
+
+ // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+ // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+ // in it. This is a win when the constant is not otherwise available because
+ // it replaces two constant pool loads with one. We only do this if the FP
+ // type is known to be legal, because if it isn't, then we are before legalize
+ // types an we want the other legalization to happen first (e.g. to avoid
+ // messing with soft float).
+ if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+ if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+ if (TLI.isTypeLegal(N2.getValueType()) &&
+ // If both constants have multiple uses, then we won't need to do an
+ // extra load, they are likely around in registers for other users.
+ (TV->hasOneUse() || FV->hasOneUse())) {
+ Constant *Elts[] = {
+ const_cast<ConstantFP*>(FV->getConstantFPValue()),
+ const_cast<ConstantFP*>(TV->getConstantFPValue())
+ };
+ // Create a ConstantArray of the two constants.
+ Constant *CA =
+ ConstantArray::get(ArrayType::get(Elts[0]->getType(), 2), Elts, 2);
+ SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy());
+ unsigned Alignment =
+ 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+ // Get the offsets to the 0 and 1 element of the array so that we can
+ // select between them.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ unsigned EltSize =
+ (unsigned)TLI.getTargetData()->getTypePaddedSize(Elts[0]->getType());
+ SDValue One = DAG.getIntPtrConstant(EltSize);
+
+ SDValue Cond = DAG.getSetCC(DL,
+ TLI.getSetCCResultType(N0.getValueType()),
+ N0, N1, CC);
+ SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
+ Cond, One, Zero);
+ CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+ CstOffset);
+ return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0, false,
+ Alignment);
+
+ }
+ }
// Check to see if we can perform the "gzip trick", transforming
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
Added: llvm/trunk/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-03-07-FPConstSelect.ll?rev=66358&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2009-03-07-FPConstSelect.ll (added)
+++ llvm/trunk/test/CodeGen/X86/2009-03-07-FPConstSelect.ll Sat Mar 7 19:51:30 2009
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep xmm
+; This should do a single load into the fp stack for the return, not diddle with xmm registers.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define float @f(i32 %x) nounwind readnone {
+entry:
+ %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
+ %iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01
+ ret float %iftmp.0.0
+}
Modified: llvm/trunk/test/CodeGen/X86/pic-cpool.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pic-cpool.ll?rev=66358&r1=66357&r2=66358&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pic-cpool.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pic-cpool.ll Sat Mar 7 19:51:30 2009
@@ -2,10 +2,10 @@
; RUN: -o %t -f
; RUN: grep _GLOBAL_OFFSET_TABLE_ %t
; RUN: grep piclabel %t | count 3
-; RUN: grep GOTOFF %t | count 2
-; RUN: grep CPI %t | count 4
+; RUN: grep GOTOFF %t | count 1
+; RUN: grep CPI %t | count 2
-define double @foo(i32 %a.u) {
+define double @foo(i32 %a.u) nounwind {
entry:
%tmp = icmp eq i32 %a.u,0
%retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02
More information about the llvm-commits
mailing list