[llvm-commits] [llvm] r46307 - in /llvm/trunk: lib/Target/X86/X86ISelDAGToDAG.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/fp-stack-direct-ret.ll test/CodeGen/X86/fp-stack-ret-conv.ll test/CodeGen/X86/pr1505b.ll

Thu Jan 24 00:07:49 PST 2008

Author: lattner
Date: Thu Jan 24 02:07:48 2008
New Revision: 46307

URL: http://llvm.org/viewvc/llvm-project?rev=46307&view=rev
Log:
Significantly simplify and improve handling of FP function results on x86-32.
This case returns the value in ST(0) and then has to convert it to an SSE
register.  This causes significant codegen ugliness in some cases.  For 
example in the trivial fp-stack-direct-ret.ll testcase we used to generate:

_bar:
	subl	$28, %esp
	call	L_foo$stub
	fstpl	16(%esp)
	movsd	16(%esp), %xmm0
	movsd	%xmm0, 8(%esp)
	fldl	8(%esp)
	addl	$28, %esp
	ret

because we move the result of foo() into an XMM register, then have to
move it back for the return of bar.

Instead of hacking ever-more special cases into the call result lowering code
we take a much simpler approach: on x86-32, fp return is modeled as always 
returning into an f80 register which is then truncated to f32 or f64 as needed.
Similarly for a result, we model it as an extension to f80 + return.

This exposes the truncate and extensions to the dag combiner, allowing target
independent code to hack on them, eliminating them in this case.  This gives 
us this code for the example above:

_bar:
	subl	$12, %esp
	call	L_foo$stub
	addl	$12, %esp
	ret

The nasty aspect of this is that these conversions are not legal, but we want
the second pass of dag combiner (post-legalize) to be able to hack on them.
To handle this, we lie to legalize and say they are legal, then custom expand
them on entry to the isel pass (PreprocessForFPConvert).  This is gross, but
less gross than the code it is replacing :)

This also allows us to generate better code in several other cases.  For 
example on fp-stack-ret-conv.ll, we now generate:

_test:
	subl	$12, %esp
	call	L_foo$stub
	fstps	8(%esp)
	movl	16(%esp), %eax
	cvtss2sd	8(%esp), %xmm0
	movsd	%xmm0, (%eax)
	addl	$12, %esp
	ret

where before we produced (incidentally, the old bad code is identical to what
gcc produces):

_test:
	subl	$12, %esp
	call	L_foo$stub
	fstpl	(%esp)
	cvtsd2ss	(%esp), %xmm0
	cvtss2sd	%xmm0, %xmm0
	movl	16(%esp), %eax
	movsd	%xmm0, (%eax)
	addl	$12, %esp
	ret

Note that we generate slightly worse code on pr1505b.ll due to a scheduling 
deficiency that is unrelated to this patch.


Added:
    llvm/trunk/test/CodeGen/X86/fp-stack-direct-ret.ll
    llvm/trunk/test/CodeGen/X86/fp-stack-ret-conv.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/pr1505b.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=46307&r1=46306&r2=46307&view=diff

==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Thu Jan 24 02:07:48 2008
@@ -156,7 +156,8 @@
     bool TryFoldLoad(SDOperand P, SDOperand N,
                      SDOperand &Base, SDOperand &Scale,
                      SDOperand &Index, SDOperand &Disp);
-    void InstructionSelectPreprocess(SelectionDAG &DAG);
+    void PreprocessForRMW(SelectionDAG &DAG);
+    void PreprocessForFPConvert(SelectionDAG &DAG);
 
     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
     /// inline asm expressions.
@@ -350,9 +351,10 @@
                          Store.getOperand(2), Store.getOperand(3));
 }
 
-/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
-/// selector to pick more load-modify-store instructions. This is a common
-/// case:
+/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
+/// This is only run if not in -fast mode (aka -O0).
+/// This allows the instruction selector to pick more read-modify-write
+/// instructions. This is a common case:
 ///
 ///     [Load chain]
 ///         ^
@@ -389,7 +391,7 @@
 ///       \      /
 ///        \    /
 ///       [Store]
-void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
+void X86DAGToDAGISel::PreprocessForRMW(SelectionDAG &DAG) {
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
          E = DAG.allnodes_end(); I != E; ++I) {
     if (!ISD::isNON_TRUNCStore(I))
@@ -459,6 +461,66 @@
   }
 }
 
+
+/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
+/// nodes that target the FP stack to be store and load to the stack.  This is a
+/// gross hack.  We would like to simply mark these as being illegal, but when
+/// we do that, legalize produces these when it expands calls, then expands
+/// these in the same legalize pass.  We would like dag combine to be able to
+/// hack on these between the call expansion and the node legalization.  As such
+/// this pass basically does "really late" legalization of these inline with the
+/// X86 isel pass.
+void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) {
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ) {
+    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
+    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
+      continue;
+    
+    // If the source and destination are SSE registers, then this is a legal
+    // conversion that should not be lowered.
+    MVT::ValueType SrcVT = N->getOperand(0).getValueType();
+    MVT::ValueType DstVT = N->getValueType(0);
+    bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
+    bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
+    if (SrcIsSSE && DstIsSSE)
+      continue;
+
+    // If this is an FPStack extension (but not a truncation), it is a noop.
+    if (!SrcIsSSE && !DstIsSSE && N->getOpcode() == ISD::FP_EXTEND)
+      continue;
+    
+    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+    // FPStack has extload and truncstore.  SSE can fold direct loads into other
+    // operations.  Based on this, decide what we want to do.
+    MVT::ValueType MemVT;
+    if (N->getOpcode() == ISD::FP_ROUND)
+      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+    else
+      MemVT = SrcIsSSE ? SrcVT : DstVT;
+    
+    SDOperand MemTmp = DAG.CreateStackTemporary(MemVT);
+    
+    // FIXME: optimize the case where the src/dest is a load or store?
+    SDOperand Store = DAG.getTruncStore(DAG.getEntryNode(), N->getOperand(0),
+                                        MemTmp, NULL, 0, MemVT);
+    SDOperand Result = DAG.getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp,
+                                      NULL, 0, MemVT);
+
+    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+    // extload we created.  This will cause general havok on the dag because
+    // anything below the conversion could be folded into other existing nodes.
+    // To avoid invalidating 'I', back it up to the convert node.
+    --I;
+    DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result);
+    
+    // Now that we did that, the node is dead.  Increment the iterator to the
+    // next node to process, then delete N.
+    ++I;
+    DAG.DeleteNode(N);
+  }  
+}
+
 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
 /// when it has created a SelectionDAG for us to codegen.
 void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
@@ -466,7 +528,10 @@
   MachineFunction::iterator FirstMBB = BB;
 
   if (!FastISel)
-    InstructionSelectPreprocess(DAG);
+    PreprocessForRMW(DAG);
+
+  // FIXME: This should only happen when not -fast.
+  PreprocessForFPConvert(DAG);
 
   // Codegen the basic block.
 #ifndef NDEBUG

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=46307&r1=46306&r2=46307&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jan 24 02:07:48 2008
@@ -47,6 +47,7 @@
   X86ScalarSSEf32 = Subtarget->hasSSE1();
   X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
   
+  bool Fast = false;
 
   RegInfo = TM.getRegisterInfo();
 
@@ -355,13 +356,15 @@
     addLegalFPImmediate(APFloat(+0.0)); // xorpd
     addLegalFPImmediate(APFloat(+0.0f)); // xorps
 
-    // Conversions to long double (in X87) go through memory.
-    setConvertAction(MVT::f32, MVT::f80, Expand);
-    setConvertAction(MVT::f64, MVT::f80, Expand);
-
-    // Conversions from long double (in X87) go through memory.
-    setConvertAction(MVT::f80, MVT::f32, Expand);
-    setConvertAction(MVT::f80, MVT::f64, Expand);
+    // Floating truncations from f80 and extensions to f80 go through memory.
+    // If optimizing, we lie about this though and handle it in
+    // InstructionSelectPreprocess so that dagcombine2 can hack on these.
+    if (Fast) {
+      setConvertAction(MVT::f32, MVT::f80, Expand);
+      setConvertAction(MVT::f64, MVT::f80, Expand);
+      setConvertAction(MVT::f80, MVT::f32, Expand);
+      setConvertAction(MVT::f80, MVT::f64, Expand);
+    }
   } else if (X86ScalarSSEf32) {
     // Use SSE for f32, x87 for f64.
     // Set up the FP register classes.
@@ -395,15 +398,17 @@
     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
 
-    // SSE->x87 conversions go through memory.
-    setConvertAction(MVT::f32, MVT::f64, Expand);
-    setConvertAction(MVT::f32, MVT::f80, Expand);
-
-    // x87->SSE truncations need to go through memory.
-    setConvertAction(MVT::f80, MVT::f32, Expand);    
-    setConvertAction(MVT::f64, MVT::f32, Expand);
-    // And x87->x87 truncations also.
-    setConvertAction(MVT::f80, MVT::f64, Expand);
+    // SSE <-> X87 conversions go through memory.  If optimizing, we lie about
+    // this though and handle it in InstructionSelectPreprocess so that
+    // dagcombine2 can hack on these.
+    if (Fast) {
+      setConvertAction(MVT::f32, MVT::f64, Expand);
+      setConvertAction(MVT::f32, MVT::f80, Expand);
+      setConvertAction(MVT::f80, MVT::f32, Expand);    
+      setConvertAction(MVT::f64, MVT::f32, Expand);
+      // And x87->x87 truncations also.
+      setConvertAction(MVT::f80, MVT::f64, Expand);
+    }
 
     if (!UnsafeFPMath) {
       setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
@@ -420,10 +425,14 @@
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
-    // Floating truncations need to go through memory.
-    setConvertAction(MVT::f80, MVT::f32, Expand);    
-    setConvertAction(MVT::f64, MVT::f32, Expand);
-    setConvertAction(MVT::f80, MVT::f64, Expand);
+    // Floating truncations go through memory.  If optimizing, we lie about
+    // this though and handle it in InstructionSelectPreprocess so that
+    // dagcombine2 can hack on these.
+    if (Fast) {
+      setConvertAction(MVT::f80, MVT::f32, Expand);    
+      setConvertAction(MVT::f64, MVT::f32, Expand);
+      setConvertAction(MVT::f80, MVT::f64, Expand);
+    }
 
     if (!UnsafeFPMath) {
       setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
@@ -647,7 +656,7 @@
     }
 
     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-    
+
     // Custom lower v2i64 and v2f64 selects.
     setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
     setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
@@ -808,30 +817,10 @@
     // a register.
     SDOperand Value = Op.getOperand(1);
     
-    // If this is an FP return with ScalarSSE, we need to move the value from
-    // an XMM register onto the fp-stack.
-    if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
-      SDOperand MemLoc;
-        
-      // If this is a load into a scalarsse value, don't store the loaded value
-      // back to the stack, only to reload it: just replace the scalar-sse load.
-      if (ISD::isNON_EXTLoad(Value.Val) &&
-           Chain.reachesChainWithoutSideEffects(Value.getOperand(0))) {
-        Chain  = Value.getOperand(0);
-        MemLoc = Value.getOperand(1);
-      } else {
-        // Spill the value to memory and reload it into top of stack.
-        unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
-        MachineFunction &MF = DAG.getMachineFunction();
-        int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
-        MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
-        Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
-      }
-      SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other);
-      SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
-      Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
-      Chain = Value.getValue(1);
-    }
+    // an XMM register onto the fp-stack.  Do this with an FP_EXTEND to f80.
+    // This will get legalized into a load/store if it can't get optimized away.
+    if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT()))
+      Value = DAG.getNode(ISD::FP_EXTEND, MVT::f80, Value);
     
     SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
     SDOperand Ops[] = { Chain, Value };
@@ -876,87 +865,26 @@
     // Copies from the FP stack are special, as ST0 isn't a valid register
     // before the fp stackifier runs.
     
-    // Copy ST0 into an RFP register with FP_GET_RESULT.
-    SDVTList Tys = DAG.getVTList(RVLocs[0].getValVT(), MVT::Other, MVT::Flag);
+    // Copy ST0 into an RFP register with FP_GET_RESULT.  If this will end up
+    // in an SSE register, copy it out as F80 and do a truncate, otherwise use
+    // the specified value type.
+    MVT::ValueType GetResultTy = RVLocs[0].getValVT();
+    if (isScalarFPTypeInSSEReg(GetResultTy))
+      GetResultTy = MVT::f80;
+    SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag);
+    
     SDOperand GROps[] = { Chain, InFlag };
     SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
     Chain  = RetVal.getValue(1);
     InFlag = RetVal.getValue(2);
-    
-    // If we are using ScalarSSE, store ST(0) to the stack and reload it into
-    // an XMM register.
-    if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
-      SDOperand StoreLoc;
-      const Value *SrcVal = 0;
-      int SrcValOffset = 0;
-      MVT::ValueType RetStoreVT = RVLocs[0].getValVT();
-      
-      // Determine where to store the value.  If the call result is directly
-      // used by a store, see if we can store directly into the location.  In
-      // this case, we'll end up producing a fst + movss[load] + movss[store] to
-      // the same location, and the two movss's will be nuked as dead.  This
-      // optimizes common things like "*D = atof(..)" to not need an
-      // intermediate stack slot.
-      if (SDOperand(TheCall, 0).hasOneUse() && 
-          SDOperand(TheCall, 1).hasOneUse()) {
-        // In addition to direct uses, we also support a FP_ROUND that uses the
-        // value, if it is directly stored somewhere.
-        SDNode *User = *TheCall->use_begin();
-        if (User->getOpcode() == ISD::FP_ROUND && User->hasOneUse())
-          User = *User->use_begin();
-        
-        // Ok, we have one use of the value and one use of the chain.  See if
-        // they are the same node: a store.
-        if (StoreSDNode *N = dyn_cast<StoreSDNode>(User)) {
-          // Verify that the value being stored is either the call or a
-          // truncation of the call.
-          SDNode *StoreVal = N->getValue().Val;
-          if (StoreVal == TheCall)
-            ; // ok.
-          else if (StoreVal->getOpcode() == ISD::FP_ROUND &&
-                   StoreVal->hasOneUse() && 
-                   StoreVal->getOperand(0).Val == TheCall)
-            ; // ok.
-          else
-            N = 0;  // not ok.
-            
-          if (N && N->getChain().Val == TheCall &&
-              !N->isVolatile() && !N->isTruncatingStore() && 
-              N->getAddressingMode() == ISD::UNINDEXED) {
-            StoreLoc = N->getBasePtr();
-            SrcVal = N->getSrcValue();
-            SrcValOffset = N->getSrcValueOffset();
-            RetStoreVT = N->getValue().getValueType();
-          }
-        }
-      }
 
-      // If we weren't able to optimize the result, just create a temporary
-      // stack slot.
-      if (StoreLoc.Val == 0) {
-        MachineFunction &MF = DAG.getMachineFunction();
-        int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
-        StoreLoc = DAG.getFrameIndex(SSFI, getPointerTy());
-      }
-      
-      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
-      // shouldn't be necessary except that RFP cannot be live across
-      // multiple blocks (which could happen if a select gets lowered into
-      // multiple blocks and scheduled in between them). When stackifier is
-      // fixed, they can be uncoupled.
-      SDOperand Ops[] = {
-        Chain, RetVal, StoreLoc, DAG.getValueType(RetStoreVT), InFlag
-      };
-      Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
-      RetVal = DAG.getLoad(RetStoreVT, Chain,
-                           StoreLoc, SrcVal, SrcValOffset);
-      Chain = RetVal.getValue(1);
-      
-      // If we optimized a truncate, then extend the result back to its desired
-      // type.
-      if (RVLocs[0].getValVT() != RetStoreVT)
-        RetVal = DAG.getNode(ISD::FP_EXTEND, RVLocs[0].getValVT(), RetVal);
-    }
+    // If we want the result in an SSE register, use an FP_TRUNCATE to get it
+    // there.
+    if (GetResultTy != RVLocs[0].getValVT())
+      RetVal = DAG.getNode(ISD::FP_ROUND, RVLocs[0].getValVT(), RetVal,
+                           // This truncation won't change the value.
+                           DAG.getIntPtrConstant(1));
+    
     ResultVals.push_back(RetVal);
   }
   

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=46307&r1=46306&r2=46307&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Jan 24 02:07:48 2008
@@ -2734,6 +2734,14 @@
 def : Pat<(v4i32 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
 def : Pat<(v2i64 (undef)), (IMPLICIT_DEF_VR128)>, Requires<[HasSSE2]>;
 
+// extload f32 -> f64.  This matches load+fextend because we have a hack in 
+// the isel (PreprocessForFPConvert) that can introduce loads after dag combine.
+// Since these loads aren't folded into the fextend, we have to match it
+// explicitly here.
+let Predicates = [HasSSE2] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+           (CVTSS2SDrm addr:$src)>;
+
 // Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
 // 16-bits matter.
 def : Pat<(v8i16 (X86s2vec GR32:$src)), (MOVDI2PDIrr GR32:$src)>,

Added: llvm/trunk/test/CodeGen/X86/fp-stack-direct-ret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-stack-direct-ret.ll?rev=46307&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-stack-direct-ret.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fp-stack-direct-ret.ll Thu Jan 24 02:07:48 2008
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc -march=x86 | not grep fstp
+; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | not grep movsd
+
+declare double @foo()
+
+define double @bar() {
+entry:
+	%tmp5 = tail call double @foo()
+	ret double %tmp5
+}
+

Added: llvm/trunk/test/CodeGen/X86/fp-stack-ret-conv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-stack-ret-conv.ll?rev=46307&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-stack-ret-conv.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fp-stack-ret-conv.ll Thu Jan 24 02:07:48 2008
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -mcpu=yonah | grep cvtss2sd
+; RUN: llvm-as < %s | llc -mcpu=yonah | grep fstps
+; RUN: llvm-as < %s | llc -mcpu=yonah | not grep cvtsd2ss
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define void @test(double *%b) {
+entry:
+	%tmp13 = tail call double @foo()
+	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
+	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+	volatile store double %tmp3940, double* %b
+	ret void
+}
+
+declare double @foo()

Modified: llvm/trunk/test/CodeGen/X86/pr1505b.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr1505b.ll?rev=46307&r1=46306&r2=46307&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr1505b.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr1505b.ll Thu Jan 24 02:07:48 2008
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstpl | count 3
+; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstpl | count 4
 ; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstps | count 3
 ; PR1505