[llvm-commits] [llvm] r47470 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/mmx-copy-gprs.ll

Thu Feb 21 18:09:43 PST 2008

Author: lattner
Date: Thu Feb 21 20:09:43 2008
New Revision: 47470

URL: http://llvm.org/viewvc/llvm-project?rev=47470&view=rev
Log:
Start using GPR's to copy around mmx value instead of mmx regs.
GCC apparently does this, and code depends on not having to do
emms when this happens.  This is x86-64 only so far, second half
should handle x86-32.

rdar://5741668

Added:
    llvm/trunk/test/CodeGen/X86/mmx-copy-gprs.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=47470&r1=47469&r2=47470&view=diff

==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Feb 21 20:09:43 2008
@@ -704,6 +704,7 @@
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::SELECT);
+  setTargetDAGCombine(ISD::STORE);
 
   computeRegisterProperties();
 
@@ -5872,6 +5873,35 @@
   return SDOperand();
 }
 
+/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
+static SDOperand PerformSTORECombine(StoreSDNode *St, SelectionDAG &DAG,
+                                     const X86Subtarget *Subtarget) {
+  // Turn load->store of MMX types into GPR load/stores.  This avoids clobbering
+  // the FP state in cases where an emms may be missing.
+  if (MVT::isVector(St->getValue().getValueType()) && 
+      MVT::getSizeInBits(St->getValue().getValueType()) == 64 &&
+      // Must be a store of a load.
+      isa<LoadSDNode>(St->getChain()) &&
+      St->getChain().Val == St->getValue().Val && 
+      St->getValue().hasOneUse() && St->getChain().hasOneUse() &&
+      !St->isVolatile() && !cast<LoadSDNode>(St->getChain())->isVolatile()) {
+    LoadSDNode *Ld = cast<LoadSDNode>(St->getChain());
+    
+    // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+    if (Subtarget->is64Bit()) {
+      SDOperand NewLd = DAG.getLoad(MVT::i64, Ld->getChain(), Ld->getBasePtr(),
+                                    Ld->getSrcValue(), Ld->getSrcValueOffset(),
+                                    Ld->isVolatile(), Ld->getAlignment());
+      return DAG.getStore(NewLd.getValue(1), NewLd, St->getBasePtr(),
+                          St->getSrcValue(), St->getSrcValueOffset(),
+                          St->isVolatile(), St->getAlignment());
+    }
+    
+    // TODO: 2 32-bit copies.
+  }
+  return SDOperand();
+}
+
 /// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
 /// X86ISD::FXOR nodes.
 static SDOperand PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
@@ -5908,6 +5938,8 @@
   default: break;
   case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, Subtarget);
   case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
+  case ISD::STORE:          
+      return PerformSTORECombine(cast<StoreSDNode>(N), DAG, Subtarget);
   case X86ISD::FXOR:
   case X86ISD::FOR:         return PerformFORCombine(N, DAG);
   case X86ISD::FAND:        return PerformFANDCombine(N, DAG);

Added: llvm/trunk/test/CodeGen/X86/mmx-copy-gprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-copy-gprs.ll?rev=47470&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-copy-gprs.ll (added)
+++ llvm/trunk/test/CodeGen/X86/mmx-copy-gprs.ll Thu Feb 21 20:09:43 2008
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq.*(%rsi), %rax}
+
+; This test should use GPRs to copy the mmx value, not MMX regs.  Using mmx regs,
+; increases the places that need to use emms.
+
+; rdar://5741668
+target triple = "x86_64-apple-darwin8"
+
+define i32 @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind  {
+entry:
+	%tmp1 = load <1 x i64>* %y, align 8		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp1, <1 x i64>* %x, align 8
+	ret i32 undef
+}