[llvm-commits] [llvm] r45399 - in /llvm/trunk: lib/Target/X86/README.txt lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/fp-stack-ret-store.ll

Fri Dec 28 22:41:28 PST 2007

Author: lattner
Date: Sat Dec 29 00:41:28 2007
New Revision: 45399

URL: http://llvm.org/viewvc/llvm-project?rev=45399&view=rev
Log:
avoid going through a stack slot to convert from fpstack to xmm reg
if we are just going to store it back anyway.  This improves things 
like:
double foo();
void bar(double *P) { *P = foo(); }


Added:
    llvm/trunk/test/CodeGen/X86/fp-stack-ret-store.ll
Modified:
    llvm/trunk/lib/Target/X86/README.txt
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/README.txt?rev=45399&r1=45398&r2=45399&view=diff

==============================================================================

--- llvm/trunk/lib/Target/X86/README.txt (original)
+++ llvm/trunk/lib/Target/X86/README.txt Sat Dec 29 00:41:28 2007
@@ -1636,24 +1636,3 @@
 This would result in smaller code and more efficient microops.
 
 //===---------------------------------------------------------------------===//
-
-We should be smarter about conversion from fpstack to XMM regs.
-
-double foo();
-void bar(double *P) { *P = foo(); }
-
-We compile that to:
-
-_bar:
-	subl	$12, %esp
-	call	L_foo$stub
-	fstpl	(%esp)
-	movl	16(%esp), %eax
-	movsd	(%esp), %xmm0
-	movsd	%xmm0, (%eax)
-	addl	$12, %esp
-	ret
-
-for example.  The magic to/from the stack is unneeded.
-
-//===---------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=45399&r1=45398&r2=45399&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Dec 29 00:41:28 2007
@@ -33,7 +33,6 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SSARegMap.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/SmallSet.h"
@@ -812,7 +811,6 @@
   CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
   CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
 
-  
   SmallVector<SDOperand, 8> ResultVals;
   
   // Copy all of the result registers out of their specified physreg.
@@ -838,17 +836,50 @@
     // an XMM register.
     if ((X86ScalarSSEf32 && RVLocs[0].getValVT() == MVT::f32) ||
         (X86ScalarSSEf64 && RVLocs[0].getValVT() == MVT::f64)) {
+      SDOperand StoreLoc;
+      const Value *SrcVal = 0;
+      int SrcValOffset = 0;
+      
+      // Determine where to store the value.  If the call result is directly
+      // used by a store, see if we can store directly into the location.  In
+      // this case, we'll end up producing a fst + movss[load] + movss[store] to
+      // the same location, and the two movss's will be nuked as dead.  This
+      // optimizes common things like "*D = atof(..)" to not need an
+      // intermediate stack slot.
+      if (SDOperand(TheCall, 0).hasOneUse() && 
+          SDOperand(TheCall, 1).hasOneUse()) {
+        // Ok, we have one use of the value and one use of the chain.  See if
+        // they are the same node: a store.
+        if (StoreSDNode *N = dyn_cast<StoreSDNode>(*TheCall->use_begin())) {
+          if (N->getChain().Val == TheCall && N->getValue().Val == TheCall &&
+              !N->isVolatile() && !N->isTruncatingStore() && 
+              N->getAddressingMode() == ISD::UNINDEXED) {
+            StoreLoc = N->getBasePtr();
+            SrcVal = N->getSrcValue();
+            SrcValOffset = N->getSrcValueOffset();
+          }
+        }
+      }
+
+      // If we weren't able to optimize the result, just create a temporary
+      // stack slot.
+      if (StoreLoc.Val == 0) {
+        MachineFunction &MF = DAG.getMachineFunction();
+        int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+        StoreLoc = DAG.getFrameIndex(SSFI, getPointerTy());
+      }
+      
       // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
       // shouldn't be necessary except that RFP cannot be live across
-      // multiple blocks. When stackifier is fixed, they can be uncoupled.
-      MachineFunction &MF = DAG.getMachineFunction();
-      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
-      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+      // multiple blocks (which could happen if a select gets lowered into
+      // multiple blocks and scheduled in between them). When stackifier is
+      // fixed, they can be uncoupled.
       SDOperand Ops[] = {
-        Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
+        Chain, RetVal, StoreLoc, DAG.getValueType(RVLocs[0].getValVT()), InFlag
       };
       Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
-      RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
+      RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain,
+                           StoreLoc, SrcVal, SrcValOffset);
       Chain = RetVal.getValue(1);
     }
     ResultVals.push_back(RetVal);

Added: llvm/trunk/test/CodeGen/X86/fp-stack-ret-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-stack-ret-store.ll?rev=45399&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-stack-ret-store.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fp-stack-ret-store.ll Sat Dec 29 00:41:28 2007
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc | not grep movss
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+; This should store directly into P from the FP stack.  It should not
+; go through a stack slot to get there.
+
+define void @bar(double* %P) {
+entry:
+	%tmp = tail call double (...)* @foo( )		; <double> [#uses=1]
+	store double %tmp, double* %P, align 8
+	ret void
+}
+
+declare double @foo(...)