[llvm-commits] [llvm] r66358 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/2009-03-07-FPConstSelect.ll test/CodeGen/X86/pic-cpool.ll

Tue Mar 10 13:42:54 PDT 2009

Hi Chris,

This is breaking povray, 450.soplex, and 456.hmmer on x86 Darwin.

Evan

On Mar 7, 2009, at 5:51 PM, Chris Lattner wrote:

> Author: lattner
> Date: Sat Mar  7 19:51:30 2009
> New Revision: 66358
>
> URL: http://llvm.org/viewvc/llvm-project?rev=66358&view=rev
> Log:
> implement an optimization to codegen c ? 1.0 : 2.0 as load { 2.0,  
> 1.0 } + c*4.
> For 2009-03-07-FPConstSelect.ll we now produce:
>
> _f:
> 	xorl	%eax, %eax
> 	testl	%edi, %edi
> 	movl	$4, %ecx
> 	cmovne	%rax, %rcx
> 	leaq	LCPI1_0(%rip), %rax
> 	movss	(%rcx,%rax), %xmm0
> 	ret
>
> previously we produced:
>
> _f:
> 	subl	$4, %esp
> 	cmpl	$0, 8(%esp)
> 	movss	LCPI1_0, %xmm0
> 	je	LBB1_2	## entry
> LBB1_1:	## entry
> 	movss	LCPI1_1, %xmm0
> LBB1_2:	## entry
> 	movss	%xmm0, (%esp)
> 	flds	(%esp)
> 	addl	$4, %esp
> 	ret
>
> on PPC the code also improves to:
>
> _f:
> 	cntlzw r2, r3
> 	srwi r2, r2, 5
> 	li r3, lo16(LCPI1_0)
> 	slwi r2, r2, 2
> 	addis r3, r3, ha16(LCPI1_0)
> 	lfsx f1, r3, r2
> 	blr
>
> from:
>
> _f:
> 	li r2, lo16(LCPI1_1)
> 	cmplwi cr0, r3, 0
> 	addis r2, r2, ha16(LCPI1_1)
> 	beq cr0, LBB1_2	; entry
> LBB1_1:	; entry
> 	li r2, lo16(LCPI1_0)
> 	addis r2, r2, ha16(LCPI1_0)
> LBB1_2:	; entry
> 	lfs f1, 0(r2)
> 	blr
>
> This also improves the existing pic-cpool case from:
>
> foo:
> 	subl	$12, %esp
> 	call	.Lllvm$1.$piclabel
> .Lllvm$1.$piclabel:
> 	popl	%eax
> 	addl	$_GLOBAL_OFFSET_TABLE_ + [.-.Lllvm$1.$piclabel], %eax
> 	cmpl	$0, 16(%esp)
> 	movsd	.LCPI1_0 at GOTOFF(%eax), %xmm0
> 	je	.LBB1_2	# entry
> .LBB1_1:	# entry
> 	movsd	.LCPI1_1 at GOTOFF(%eax), %xmm0
> .LBB1_2:	# entry
> 	movsd	%xmm0, (%esp)
> 	fldl	(%esp)
> 	addl	$12, %esp
> 	ret
>
> to:
>
> foo:
> 	call	.Lllvm$1.$piclabel
> .Lllvm$1.$piclabel:
> 	popl	%eax
> 	addl	$_GLOBAL_OFFSET_TABLE_ + [.-.Lllvm$1.$piclabel], %eax
> 	xorl	%ecx, %ecx
> 	cmpl	$0, 4(%esp)
> 	movl	$8, %edx
> 	cmovne	%ecx, %edx
> 	fldl	.LCPI1_0 at GOTOFF(%eax,%edx)
> 	ret
>
> This triggers a few dozen times in spec FP 2000.
>
>
> Added:
>    llvm/trunk/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
> Modified:
>    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>    llvm/trunk/test/CodeGen/X86/pic-cpool.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=66358&r1=66357&r2=66358&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Mar  7  
> 19:51:30 2009
> @@ -14,8 +14,10 @@
>
> #define DEBUG_TYPE "dagcombine"
> #include "llvm/CodeGen/SelectionDAG.h"
> +#include "llvm/DerivedTypes.h"
> #include "llvm/CodeGen/MachineFunction.h"
> #include "llvm/CodeGen/MachineFrameInfo.h"
> +#include "llvm/CodeGen/PseudoSourceValue.h"
> #include "llvm/Analysis/AliasAnalysis.h"
> #include "llvm/Target/TargetData.h"
> #include "llvm/Target/TargetFrameInfo.h"
> @@ -2890,8 +2892,7 @@
>       return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
>                          N0.getOperand(0), N0.getOperand(1),
>                          N1, N2, N0.getOperand(2));
> -    else
> -      return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
> +    return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
>   }
>
>   return SDValue();
> @@ -5674,9 +5675,14 @@
>   return false;
> }
>
> +/// SimplifySelectCC - Simplify an expression of the form (N0 cond  
> N1) ? N2 : N3
> +/// where 'cond' is the comparison specified by CC.
> SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0,  
> SDValue N1,
>                                       SDValue N2, SDValue N3,
>                                       ISD::CondCode CC, bool  
> NotExtCompare) {
> +  // (x ? y : y) -> y.
> +  if (N2 == N3) return N2;
> +
>   MVT VT = N2.getValueType();
>   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
>   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
> @@ -5712,6 +5718,51 @@
>         return DAG.getNode(ISD::FABS, DL, VT, N3);
>     }
>   }
> +
> +  // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond  
> b) ? 0 : 4)"
> +  // where "tmp" is a constant pool entry containing an array with  
> 1.0 and 2.0
> +  // in it.  This is a win when the constant is not otherwise  
> available because
> +  // it replaces two constant pool loads with one.  We only do this  
> if the FP
> +  // type is known to be legal, because if it isn't, then we are  
> before legalize
> +  // types an we want the other legalization to happen first (e.g.  
> to avoid
> +  // messing with soft float).
> +  if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
> +    if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
> +      if (TLI.isTypeLegal(N2.getValueType()) &&
> +          // If both constants have multiple uses, then we won't  
> need to do an
> +          // extra load, they are likely around in registers for  
> other users.
> +          (TV->hasOneUse() || FV->hasOneUse())) {
> +        Constant *Elts[] = {
> +          const_cast<ConstantFP*>(FV->getConstantFPValue()),
> +          const_cast<ConstantFP*>(TV->getConstantFPValue())
> +        };
> +        // Create a ConstantArray of the two constants.
> +        Constant *CA =
> +          ConstantArray::get(ArrayType::get(Elts[0]->getType(), 2),  
> Elts, 2);
> +        SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy());
> +        unsigned Alignment =
> +          1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
> +
> +        // Get the offsets to the 0 and 1 element of the array so  
> that we can
> +        // select between them.
> +        SDValue Zero = DAG.getIntPtrConstant(0);
> +        unsigned EltSize =
> +          (unsigned)TLI.getTargetData()->getTypePaddedSize(Elts[0]- 
> >getType());
> +        SDValue One = DAG.getIntPtrConstant(EltSize);
> +
> +        SDValue Cond = DAG.getSetCC(DL,
> +                                    TLI.getSetCCResultType 
> (N0.getValueType()),
> +                                    N0, N1, CC);
> +        SDValue CstOffset = DAG.getNode(ISD::SELECT, DL,  
> Zero.getValueType(),
> +                                        Cond, One, Zero);
> +        CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
> +                            CstOffset);
> +        return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode 
> (), CPIdx,
> +                           PseudoSourceValue::getConstantPool(), 0,  
> false,
> +                           Alignment);
> +
> +      }
> +    }
>
>   // Check to see if we can perform the "gzip trick", transforming
>   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
>
> Added: llvm/trunk/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-03-07-FPConstSelect.ll?rev=66358&view=auto
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/2009-03-07-FPConstSelect.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/2009-03-07-FPConstSelect.ll Sat Mar   
> 7 19:51:30 2009
> @@ -0,0 +1,12 @@
> +; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep xmm
> +; This should do a single load into the fp stack for the return,  
> not diddle with xmm registers.
> +
> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32- 
> i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64- 
> f80:128:128"
> +target triple = "i386-apple-darwin7"
> +
> +define float @f(i32 %x) nounwind readnone {
> +entry:
> +	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
> +	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01
> +	ret float %iftmp.0.0
> +}
>
> Modified: llvm/trunk/test/CodeGen/X86/pic-cpool.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pic-cpool.ll?rev=66358&r1=66357&r2=66358&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/pic-cpool.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/pic-cpool.ll Sat Mar  7 19:51:30 2009
> @@ -2,10 +2,10 @@
> ; RUN:   -o %t -f
> ; RUN: grep _GLOBAL_OFFSET_TABLE_ %t
> ; RUN: grep piclabel %t | count 3
> -; RUN: grep GOTOFF %t | count 2
> -; RUN: grep CPI %t | count 4
> +; RUN: grep GOTOFF %t | count 1
> +; RUN: grep CPI %t | count 2
>
> -define double @foo(i32 %a.u) {
> +define double @foo(i32 %a.u) nounwind {
> entry:
>     %tmp = icmp eq i32 %a.u,0
>     %retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits