[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelLowering.cpp X86ISelLowering.h X86InstrSSE.td

Thu Jan 4 23:56:24 PST 2007

Changes in directory llvm/lib/Target/X86:

X86ISelLowering.cpp updated: 1.310 -> 1.311
X86ISelLowering.h updated: 1.81 -> 1.82
X86InstrSSE.td updated: 1.174 -> 1.175
---
Log message:

With SSE2, expand FCOPYSIGN to a series of SSE bitwise operations.

---
Diffs of the changes:  (+93 -8)

 X86ISelLowering.cpp |   65 ++++++++++++++++++++++++++++++++++++++++++++++++----
 X86ISelLowering.h   |   11 ++++++++
 X86InstrSSE.td      |   25 ++++++++++++++++----
 3 files changed, 93 insertions(+), 8 deletions(-)


Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.310 llvm/lib/Target/X86/X86ISelLowering.cpp:1.311

--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.310	Wed Jan  3 11:24:59 2007
+++ llvm/lib/Target/X86/X86ISelLowering.cpp	Fri Jan  5 01:55:56 2007
@@ -250,9 +250,6 @@
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 
-  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
-
   if (X86ScalarSSE) {
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -266,6 +263,10 @@
     setOperationAction(ISD::FNEG , MVT::f64, Custom);
     setOperationAction(ISD::FNEG , MVT::f32, Custom);
 
+    // Use ANDPD and ORPD to simulate FCOPYSIGN.
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
     // We don't support sin/cos/fmod
     setOperationAction(ISD::FSIN , MVT::f64, Expand);
     setOperationAction(ISD::FCOS , MVT::f64, Expand);
@@ -283,7 +284,9 @@
     // Set up the FP register classes.
     addRegisterClass(MVT::f64, X86::RFPRegisterClass);
 
-    setOperationAction(ISD::UNDEF, MVT::f64, Expand);
+    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
     if (!UnsafeFPMath) {
       setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
@@ -4123,6 +4126,56 @@
   return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
 }
 
+SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
+  MVT::ValueType VT = Op.getValueType();
+  MVT::ValueType SrcVT = Op.getOperand(1).getValueType();
+  const Type *SrcTy =  MVT::getTypeForValueType(SrcVT);
+  // First get the sign bit of second operand.
+  std::vector<Constant*> CV;
+  if (SrcVT == MVT::f64) {
+    CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63)));
+    CV.push_back(ConstantFP::get(SrcTy, 0.0));
+  } else {
+    CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31)));
+    CV.push_back(ConstantFP::get(SrcTy, 0.0));
+    CV.push_back(ConstantFP::get(SrcTy, 0.0));
+    CV.push_back(ConstantFP::get(SrcTy, 0.0));
+  }
+  Constant *CS = ConstantStruct::get(CV);
+  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
+  std::vector<MVT::ValueType> Tys;
+  Tys.push_back(VT);
+  Tys.push_back(MVT::Other);
+  SmallVector<SDOperand, 3> Ops;
+  Ops.push_back(DAG.getEntryNode());
+  Ops.push_back(CPIdx);
+  Ops.push_back(DAG.getSrcValue(NULL));
+  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
+  SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op.getOperand(1), Mask);
+
+  // Shift sign bit right or left if the two operands have different types.
+  if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
+    // Op0 is MVT::f32, Op1 is MVT::f64.
+    SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
+    SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
+                          DAG.getConstant(32, MVT::i32));
+    SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
+    SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
+                          DAG.getConstant(0, getPointerTy()));
+  } else if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
+    // Op0 is MVT::f64, Op1 is MVT::f32.
+    SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, SignBit);
+    SignBit = DAG.getNode(X86ISD::FSHL, MVT::v4f32, SignBit,
+                          DAG.getConstant(32, MVT::i32));
+    SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, SignBit);
+    SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, SignBit,
+                          DAG.getConstant(0, getPointerTy()));
+  }
+
+  // Or the first operand with the sign bit.
+  return DAG.getNode(X86ISD::FOR, VT, Op.getOperand(0), SignBit);
+}
+
 SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
                                         SDOperand Chain) {
   assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
@@ -4955,6 +5008,7 @@
   case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
   case ISD::FABS:               return LowerFABS(Op, DAG);
   case ISD::FNEG:               return LowerFNEG(Op, DAG);
+  case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
   case ISD::SETCC:              return LowerSETCC(Op, DAG, DAG.getEntryNode());
   case ISD::SELECT:             return LowerSELECT(Op, DAG);
   case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
@@ -4976,7 +5030,10 @@
   case X86ISD::SHLD:               return "X86ISD::SHLD";
   case X86ISD::SHRD:               return "X86ISD::SHRD";
   case X86ISD::FAND:               return "X86ISD::FAND";
+  case X86ISD::FOR:                return "X86ISD::FOR";
   case X86ISD::FXOR:               return "X86ISD::FXOR";
+  case X86ISD::FSHL:               return "X86ISD::FSHL";
+  case X86ISD::FSRL:               return "X86ISD::FSRL";
   case X86ISD::FILD:               return "X86ISD::FILD";
   case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
   case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";


Index: llvm/lib/Target/X86/X86ISelLowering.h
diff -u llvm/lib/Target/X86/X86ISelLowering.h:1.81 llvm/lib/Target/X86/X86ISelLowering.h:1.82
--- llvm/lib/Target/X86/X86ISelLowering.h:1.81	Thu Nov 30 15:55:46 2006
+++ llvm/lib/Target/X86/X86ISelLowering.h	Fri Jan  5 01:55:56 2007
@@ -35,10 +35,20 @@
       /// to X86::ANDPS or X86::ANDPD.
       FAND,
 
+      /// FOR - Bitwise logical OR of floating point values. This corresponds
+      /// to X86::ORPS or X86::ORPD.
+      FOR,
+
       /// FXOR - Bitwise logical XOR of floating point values. This corresponds
       /// to X86::XORPS or X86::XORPD.
       FXOR,
 
+      /// FSHL, FSRL - Shift a floating point value (in SSE register) by n bits
+      /// while shifting in 0's. These corresponds to X86::PSLLDQ or
+      /// X86::PSRLDQ.
+      FSHL,
+      FSRL,
+
       /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
       /// integer source in memory and FP reg result.  This corresponds to the
       /// X86::FILD*m instructions. It has three inputs (token chain, address,
@@ -389,6 +399,7 @@
     SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerFABS(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerFNEG(SDOperand Op, SelectionDAG &DAG);
+    SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG, SDOperand Chain);
     SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG);
     SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);


Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.174 llvm/lib/Target/X86/X86InstrSSE.td:1.175
--- llvm/lib/Target/X86/X86InstrSSE.td:1.174	Thu Dec 14 13:43:11 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td	Fri Jan  5 01:55:56 2007
@@ -18,14 +18,21 @@
 // SSE specific DAG Nodes.
 //===----------------------------------------------------------------------===//
 
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+                                            SDTCisFP<0>, SDTCisInt<2> ]>;
+
 def X86loadp   : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>;
 def X86loadu   : SDNode<"X86ISD::LOAD_UA",   SDTLoad, [SDNPHasChain]>;
 def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
 def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
 def X86fand    : SDNode<"X86ISD::FAND",      SDTFPBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
+def X86for     : SDNode<"X86ISD::FOR",       SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
 def X86fxor    : SDNode<"X86ISD::FXOR",      SDTFPBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
+def X86fshl    : SDNode<"X86ISD::FSHL",      SDTX86FPShiftOp>;
+def X86fsrl    : SDNode<"X86ISD::FSRL",      SDTX86FPShiftOp>;
 def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest,
                         [SDNPHasChain, SDNPOutFlag]>;
 def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest,
@@ -607,9 +614,11 @@
                   "andpd {$src2, $dst|$dst, $src2}",
                   [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
 def FsORPSrr  : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                  "orps {$src2, $dst|$dst, $src2}", []>;
+                  "orps {$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
 def FsORPDrr  : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                  "orpd {$src2, $dst|$dst, $src2}", []>;
+                  "orpd {$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
 def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
                   "xorps {$src2, $dst|$dst, $src2}",
                   [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
@@ -626,9 +635,13 @@
                   [(set FR64:$dst, (X86fand FR64:$src1,
                                     (X86loadpf64 addr:$src2)))]>;
 def FsORPSrm  : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                  "orps {$src2, $dst|$dst, $src2}", []>;
+                  "orps {$src2, $dst|$dst, $src2}",
+                  [(set FR32:$dst, (X86for FR32:$src1,
+                                    (X86loadpf32 addr:$src2)))]>;
 def FsORPDrm  : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                  "orpd {$src2, $dst|$dst, $src2}", []>;
+                  "orpd {$src2, $dst|$dst, $src2}",
+                  [(set FR64:$dst, (X86for FR64:$src1,
+                                    (X86loadpf64 addr:$src2)))]>;
 def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
                   "xorps {$src2, $dst|$dst, $src2}",
                   [(set FR32:$dst, (X86fxor FR32:$src1,
@@ -1364,6 +1377,10 @@
             (v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
   def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
             (v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+  def : Pat<(v4f32 (X86fshl VR128:$src1, i32immSExt8:$src2)),
+            (v4f32 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
+  def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+            (v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
 }
 
 // Logical