[llvm-commits] [llvm] r62664 - in /llvm/trunk: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp lib/Target/CellSPU/README.txt lib/Target/CellSPU/SPU128InstrInfo.td lib/Target/CellSPU/SPU64InstrInfo.td lib/Target/CellSPU/SPUISelDAGToDAG.cpp lib/Target/CellSPU/SPUISelLowering.cpp lib/Target/CellSPU/SPUISelLowering.h lib/Target/CellSPU/SPUInstrInfo.cpp lib/Target/CellSPU/SPUInstrInfo.td test/CodeGen/CellSPU/fneg-fabs.ll

Tue Jan 20 20:58:48 PST 2009

Author: pingbak
Date: Tue Jan 20 22:58:48 2009
New Revision: 62664

URL: http://llvm.org/viewvc/llvm-project?rev=62664&view=rev
Log:
CellSPU:
- Ensure that (operation) legalization emits proper FDIV libcall when needed.
- Fix various bugs encountered during llvm-spu-gcc build, along with various
  cleanups.
- Start supporting double precision comparisons for remaining libgcc2 build.
  Discovered interesting DAGCombiner feature, which is currently solved via
  custom lowering (64-bit constants are not legal on CellSPU, but DAGCombiner
  insists on inserting one anyway.)
- Update README.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/trunk/lib/Target/CellSPU/README.txt
    llvm/trunk/lib/Target/CellSPU/SPU128InstrInfo.td
    llvm/trunk/lib/Target/CellSPU/SPU64InstrInfo.td
    llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
    llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp
    llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h
    llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp
    llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td
    llvm/trunk/test/CodeGen/CellSPU/fneg-fabs.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=62664&r1=62663&r2=62664&view=diff

==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Tue Jan 20 22:58:48 2009
@@ -3294,6 +3294,10 @@
         LC = GetFPLibCall(VT, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80,
                           RTLIB::POW_PPCF128);
         break;
+      case ISD::FDIV:
+        LC = GetFPLibCall(VT, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80,
+                          RTLIB::DIV_PPCF128);
+        break;
       default: break;
       }
       if (LC != RTLIB::UNKNOWN_LIBCALL) {

Modified: llvm/trunk/lib/Target/CellSPU/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/README.txt?rev=62664&r1=62663&r2=62664&view=diff

==============================================================================
--- llvm/trunk/lib/Target/CellSPU/README.txt (original)
+++ llvm/trunk/lib/Target/CellSPU/README.txt Tue Jan 20 22:58:48 2009
@@ -8,7 +8,7 @@
 - Mark Thomas (floating point instructions)
 - Michael AuYeung (intrinsics)
 - Chandler Carruth (LLVM expertise)
-- Nehal Desai (debugging, RoadRunner SPU expertise)
+- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
 
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
@@ -36,7 +36,7 @@
 
 TODO:
 * Create a machine pass for performing dual-pipeline scheduling specifically
-  for CellSPU, handle inserting branch prediction instructions.
+  for CellSPU, and insert branch prediction instructions as needed.
 
 * i32 instructions:
 
@@ -48,20 +48,43 @@
   * sign and zero extension: done
   * addition: done
   * subtraction: needed
-  * multiplication: work-in-progress
+  * multiplication: done
 
 * i128 support:
 
-  * zero extension: done
+  * zero extension, any extension: done
   * sign extension: needed
   * arithmetic operators (add, sub, mul, div): needed
+  * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
 
-* Double floating point support
+    * or: done
 
-  This was started. "What's missing?" to be filled in.
+* f64 support
+
+  * Comparison operators:
+    SETOEQ              unimplemented
+    SETOGT              unimplemented
+    SETOGE              unimplemented
+    SETOLT              unimplemented
+    SETOLE              unimplemented
+    SETONE              unimplemented
+    SETO                done (lowered)
+    SETUO               done (lowered)
+    SETUEQ              unimplemented
+    SETUGT              unimplemented
+    SETUGE              unimplemented
+    SETULT              unimplemented
+    SETULE              unimplemented
+    SETUNE              unimplemented
+
+* LLVM vector suport
+
+  * VSETCC needs to be implemented. It's pretty straightforward to code, but
+    needs implementation.
 
 * Intrinsics
 
-  Lots of progress. "What's missing/incomplete?" to be filled in.
+  * spu.h instrinsics added but not tested. Need to have an operational
+    llvm-spu-gcc in order to write a unit test harness.
 
 ===-------------------------------------------------------------------------===

Modified: llvm/trunk/lib/Target/CellSPU/SPU128InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPU128InstrInfo.td?rev=62664&r1=62663&r2=62664&view=diff

==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPU128InstrInfo.td (original)
+++ llvm/trunk/lib/Target/CellSPU/SPU128InstrInfo.td Tue Jan 20 22:58:48 2009
@@ -2,7 +2,6 @@
 //
 //                     Cell SPU 128-bit operations
 //
-// Primary author: Scott Michel (scottm at aero.org)
 //===----------------------------------------------------------------------===//
                                   
 // zext 32->128: Zero extend 32-bit to 128-bit
@@ -20,3 +19,23 @@
 // zext 8->128: Zero extend 8-bit to 128-bit
 def : Pat<(i128 (zext R8C:$rSrc)),
           (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// anyext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (anyext R32C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// anyext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (anyext R64C:$rSrc)),
+          (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// anyext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (anyext R16C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// anyext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (anyext R8C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// Shift left
+def : Pat<(shl GPRC:$rA, R32C:$rB),
+          (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;

Modified: llvm/trunk/lib/Target/CellSPU/SPU64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPU64InstrInfo.td?rev=62664&r1=62663&r2=62664&view=diff

==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPU64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/CellSPU/SPU64InstrInfo.td Tue Jan 20 22:58:48 2009
@@ -33,6 +33,13 @@
    SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
             [/* no pattern */]>;
 
+// The generic i64 select pattern, which assumes that the comparison result
+// is in a 32-bit register that contains a select mask pattern (i.e., gather
+// bits result):
+
+def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
+          (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
+
 // select the negative condition:
 class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
   Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
@@ -43,13 +50,6 @@
   Pat<(cond R64C:$rA, R64C:$rB),
       (XORIr32 compare.Fragment, -1)>;
 
-// The generic i64 select pattern, which assumes that the comparison result
-// is in a 32-bit register that contains a select mask pattern (i.e., gather
-// bits result):
-
-def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
-          (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
-
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 // The i64 seteq fragment that does the scalar->vector conversion and
 // comparison:
@@ -331,8 +331,8 @@
                   (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
                              v2i64_mul_ashlq4<rA>.Fragment),
                   (Av4i32
-                    (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
-                               v2i64_mul_bhi64<rB>.Fragment),
+                      (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+                                 v2i64_mul_bhi64<rB>.Fragment),
                     (Av4i32
                       (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
                                  v2i64_mul_bhi64<rB>.Fragment),
@@ -381,3 +381,14 @@
                     (v4i32 VECREG:$rCGmask)),
           v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
                     (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f64 comparisons
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBf64_cond:
+   SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
+            [(set R64FP:$rT,
+                  (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;

Modified: llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp?rev=62664&r1=62663&r2=62664&view=diff

==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp Tue Jan 20 22:58:48 2009
@@ -685,26 +685,26 @@
       break;
     case MVT::i32:
       shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                             CurDAG->getConstant(0x80808080, MVT::i32),
-                             CurDAG->getConstant(0x00010203, MVT::i32),
-                             CurDAG->getConstant(0x80808080, MVT::i32),
-                             CurDAG->getConstant(0x08090a0b, MVT::i32));
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x00010203, MVT::i32),
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x08090a0b, MVT::i32));
       break;
 
     case MVT::i16:
       shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                             CurDAG->getConstant(0x80808080, MVT::i32),
-                             CurDAG->getConstant(0x80800203, MVT::i32),
-                             CurDAG->getConstant(0x80808080, MVT::i32),
-                             CurDAG->getConstant(0x80800a0b, MVT::i32));
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x80800203, MVT::i32),
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x80800a0b, MVT::i32));
       break;
 
     case MVT::i8:
       shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                             CurDAG->getConstant(0x80808080, MVT::i32),
-                             CurDAG->getConstant(0x80808003, MVT::i32),
-                             CurDAG->getConstant(0x80808080, MVT::i32),
-                             CurDAG->getConstant(0x8080800b, MVT::i32));
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x80808003, MVT::i32),
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x8080800b, MVT::i32));
       break;
     }
 
@@ -714,9 +714,9 @@
 
     SDValue zextShuffle =
             CurDAG->getNode(SPUISD::SHUFB, OpVecVT,
-                                       SDValue(PromoteScalar, 0),
-                                       SDValue(PromoteScalar, 0),
-                                       SDValue(shufMaskLoad, 0));
+                            SDValue(PromoteScalar, 0),
+                            SDValue(PromoteScalar, 0),
+                            SDValue(shufMaskLoad, 0));
 
     // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
     // re-use it in the VEC2PREFSLOT selection without needing to explicitly
@@ -745,6 +745,27 @@
     return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
                                       Op.getOperand(0), Op.getOperand(1),
                                       SDValue(CGLoad, 0)));
+  } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+    SDNode *CGLoad =
+            emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
+
+    return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, OpVT,
+                                      Op.getOperand(0), Op.getOperand(1),
+                                      SDValue(CGLoad, 0)));
+  } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+    SDNode *CGLoad =
+            emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG));
+
+    return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, OpVT,
+                                      Op.getOperand(0), Op.getOperand(1),
+                                      SDValue(CGLoad, 0)));
+  } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+    SDNode *CGLoad =
+            emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG));
+
+    return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, OpVT,
+                                      Op.getOperand(0), Op.getOperand(1),
+                                      SDValue(CGLoad, 0)));
   } else if (Opc == ISD::SHL) {
     if (OpVT == MVT::i64) {
       return SelectSHLi64(Op, OpVT);

Modified: llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp?rev=62664&r1=62663&r2=62664&view=diff

==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp Tue Jan 20 22:58:48 2009
@@ -92,6 +92,9 @@
   setUseUnderscoreSetJmp(true);
   setUseUnderscoreLongJmp(true);
 
+  // Set RTLIB libcall names as used by SPU:
+  setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
+
   // Set up the SPU's register classes:
   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
@@ -183,6 +186,9 @@
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
+  // Make sure that DAGCombine doesn't insert illegal 64-bit constants
+  setOperationAction(ISD::FABS,  MVT::f64, Custom);
+
   // SPU can do rotate right and left, so legalize it... but customize for i8
   // because instructions don't exist.
 
@@ -243,6 +249,7 @@
   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
+  setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 
   // Custom lower i128 -> i64 truncates
   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
@@ -410,6 +417,9 @@
     node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
+    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
+    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
+            "SPUISD::ROTBYTES_LEFT_BITS";
     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
@@ -1552,12 +1562,9 @@
   return false;  // Can't be a splat if two pieces don't match.
 }
 
-// If this is a case we can't handle, return null and let the default
-// expansion code take care of it.  If we CAN select this case, and if it
-// selects to a single instruction, return Op.  Otherwise, if we can codegen
-// this case more efficiently than a constant pool load, lower it to the
-// sequence of ops that should be used.
-static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+//! Lower a BUILD_VECTOR instruction creatively:
+SDValue
+SPU::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
   MVT VT = Op.getValueType();
   // If this is a vector of constants or undefs, get the bits.  A bit in
   // UndefBits is set if the corresponding element of the vector is an
@@ -1575,6 +1582,11 @@
 
   switch (VT.getSimpleVT()) {
   default:
+    cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
+         << VT.getMVTString()
+         << "\n";
+    abort();
+    /*NOTREACHED*/
   case MVT::v4f32: {
     uint32_t Value32 = SplatBits;
     assert(SplatSize == 4
@@ -2188,32 +2200,32 @@
 
 //! Generate the carry-generate shuffle mask.
 SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG) {
-SmallVector<SDValue, 16> ShufBytes;
+  SmallVector<SDValue, 16 > ShufBytes;
 
-// Create the shuffle mask for "rotating" the borrow up one register slot
-// once the borrow is generated.
-ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
-ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
-ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+  // Create the shuffle mask for "rotating" the borrow up one register slot
+  // once the borrow is generated.
+  ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+  ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+  ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+  ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
 
-return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                   &ShufBytes[0], ShufBytes.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+                     &ShufBytes[0], ShufBytes.size());
 }
 
 //! Generate the borrow-generate shuffle mask
 SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG) {
-SmallVector<SDValue, 16> ShufBytes;
+  SmallVector<SDValue, 16 > ShufBytes;
 
-// Create the shuffle mask for "rotating" the borrow up one register slot
-// once the borrow is generated.
-ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
-ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
-ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+  // Create the shuffle mask for "rotating" the borrow up one register slot
+  // once the borrow is generated.
+  ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+  ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+  ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+  ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
 
-return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
-                   &ShufBytes[0], ShufBytes.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+                     &ShufBytes[0], ShufBytes.size());
 }
 
 //! Lower byte immediate operations for v16i8 vectors:
@@ -2372,6 +2384,83 @@
   return SDValue();
 }
 
+//! Lower ISD::FABS
+/*!
+ DAGCombine does the same basic reduction: convert the double to i64 and mask
+ off the sign bit. Unfortunately, DAGCombine inserts the i64 constant, which
+ CellSPU has to legalize. Hence, the custom lowering.
+ */
+
+static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) {
+  MVT OpVT = Op.getValueType();
+  MVT IntVT(MVT::i64);
+  SDValue Op0 = Op.getOperand(0);
+
+  assert(OpVT == MVT::f64 && "LowerFABS: expecting MVT::f64!\n");
+
+  SDValue iABS =
+          DAG.getNode(ISD::AND, IntVT,
+                      DAG.getNode(ISD::BIT_CONVERT, IntVT, Op0),
+                      DAG.getConstant(~IntVT.getIntegerVTSignBit(), IntVT));
+
+  return DAG.getNode(ISD::BIT_CONVERT, MVT::f64, iABS);
+}
+
+//! Lower ISD::SETCC
+/*!
+ This handles MVT::f64 (double floating point) condition lowering
+ */
+
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
+                          const TargetLowering &TLI) {
+  SDValue lhs = Op.getOperand(0);
+  SDValue rhs = Op.getOperand(1);
+  CondCodeSDNode *CC = dyn_cast<CondCodeSDNode > (Op.getOperand(2));
+  MVT lhsVT = lhs.getValueType();
+  SDValue posNaN = DAG.getConstant(0x7ff0000000000001ULL, MVT::i64);
+
+  assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
+  assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
+
+  switch (CC->get()) {
+  case ISD::SETOEQ:
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETONE:
+    cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
+    abort();
+    break;
+  case ISD::SETO: {
+    SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
+    SDValue i64lhs =
+            DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
+
+    return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETLT);
+  }
+  case ISD::SETUO: {
+    SDValue lhsfabs = DAG.getNode(ISD::FABS, MVT::f64, lhs);
+    SDValue i64lhs =
+            DAG.getNode(ISD::BIT_CONVERT, MVT::i64, lhsfabs);
+
+    return DAG.getSetCC(MVT::i32, i64lhs, posNaN, ISD::SETGE);
+  }
+  case ISD::SETUEQ:
+  case ISD::SETUGT:
+  case ISD::SETUGE:
+  case ISD::SETULT:
+  case ISD::SETULE:
+  case ISD::SETUNE:
+  default:
+    cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
+    abort();
+    break;
+  }
+
+  return SDValue();
+}
+
 //! Lower ISD::SELECT_CC
 /*!
   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
@@ -2501,9 +2590,12 @@
     break;
   }
 
+  case ISD::FABS:
+    return LowerFABS(Op, DAG);
+
   // Vector-related lowering.
   case ISD::BUILD_VECTOR:
-    return LowerBUILD_VECTOR(Op, DAG);
+    return SPU::LowerBUILD_VECTOR(Op, DAG);
   case ISD::SCALAR_TO_VECTOR:
     return LowerSCALAR_TO_VECTOR(Op, DAG);
   case ISD::VECTOR_SHUFFLE:
@@ -2530,6 +2622,9 @@
   case ISD::SELECT_CC:
     return LowerSELECT_CC(Op, DAG, *this);
 
+  case ISD::SETCC:
+    return LowerSETCC(Op, DAG, *this);
+
   case ISD::TRUNCATE:
     return LowerTRUNCATE(Op, DAG);
   }
@@ -2656,8 +2751,8 @@
   }
   case SPUISD::IndirectAddr: {
     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
-      ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
-      if (CN->getZExtValue() == 0) {
+      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+      if (CN != 0 && CN->getZExtValue() == 0) {
         // (SPUindirect (SPUaform <addr>, 0), 0) ->
         // (SPUaform <addr>, 0)
 
@@ -2736,7 +2831,7 @@
     break;
   }
   }
-  
+
   // Otherwise, return unchanged.
 #ifndef NDEBUG
   if (Result.getNode()) {
@@ -2809,41 +2904,18 @@
                                                   unsigned Depth ) const {
 #if 0
   const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
-#endif
 
   switch (Op.getOpcode()) {
   default:
     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
     break;
-
-#if 0
   case CALL:
   case SHUFB:
   case SHUFFLE_MASK:
   case CNTB:
-#endif
-
-  case SPUISD::PREFSLOT2VEC: {
-    SDValue Op0 = Op.getOperand(0);
-    MVT Op0VT = Op0.getValueType();
-    unsigned Op0VTBits = Op0VT.getSizeInBits();
-    uint64_t InMask = Op0VT.getIntegerVTBitMask();
-    KnownZero |= APInt(Op0VTBits, ~InMask, false);
-    KnownOne |= APInt(Op0VTBits, InMask, false);
-    break;
-  }
-
+  case SPUISD::PREFSLOT2VEC:
   case SPUISD::LDRESULT:
-  case SPUISD::VEC2PREFSLOT: {
-    MVT OpVT = Op.getValueType();
-    unsigned OpVTBits = OpVT.getSizeInBits();
-    uint64_t InMask = OpVT.getIntegerVTBitMask();
-    KnownZero |= APInt(OpVTBits, ~InMask, false);
-    KnownOne |= APInt(OpVTBits, InMask, false);
-    break;
-  }
-
-#if 0
+  case SPUISD::VEC2PREFSLOT:
   case SPUISD::SHLQUAD_L_BITS:
   case SPUISD::SHLQUAD_L_BYTES:
   case SPUISD::VEC_SHL:
@@ -2854,8 +2926,8 @@
   case SPUISD::ROTBYTES_LEFT:
   case SPUISD::SELECT_MASK:
   case SPUISD::SELB:
-#endif
   }
+#endif
 }
 
 unsigned

Modified: llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h?rev=62664&r1=62663&r2=62664&view=diff

==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h Tue Jan 20 22:58:48 2009
@@ -61,7 +61,7 @@
     };
   }
 
-  /// Predicates that are used for node matching:
+  //! Utility functions specific to CellSPU-only:
   namespace SPU {
     SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
                              MVT ValueType);
@@ -78,6 +78,7 @@
 
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
                               const SPUTargetMachine &TM);
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
 
     SDValue getBorrowGenerateShufMask(SelectionDAG &DAG);
     SDValue getCarryGenerateShufMask(SelectionDAG &DAG);

Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp?rev=62664&r1=62663&r2=62664&view=diff

==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp Tue Jan 20 22:58:48 2009
@@ -134,6 +134,7 @@
   case SPU::ORi64_v2i64:
   case SPU::ORf32_v4f32:
   case SPU::ORf64_v2f64:
+/*
   case SPU::ORi128_r64:
   case SPU::ORi128_f64:
   case SPU::ORi128_r32:
@@ -148,6 +149,8 @@
   case SPU::ORr16_i128:
   case SPU::ORr8_i128:
   case SPU::ORvec_i128:
+*/
+/*
   case SPU::ORr16_r32:
   case SPU::ORr8_r32:
   case SPU::ORr32_r16:
@@ -158,7 +161,11 @@
   case SPU::ORr64_r32:
   case SPU::ORr64_r16:
   case SPU::ORr64_r8:
-  {
+*/
+  case SPU::ORf32_r32:
+  case SPU::ORr32_f32:
+  case SPU::ORf64_r64:
+  case SPU::ORr64_f64: {
     assert(MI.getNumOperands() == 2 &&
            MI.getOperand(0).isReg() &&
            MI.getOperand(1).isReg() &&

Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td?rev=62664&r1=62663&r2=62664&view=diff

==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td Tue Jan 20 22:58:48 2009
@@ -1259,9 +1259,6 @@
   def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
                       [/* Intentionally does not match a pattern */]>;
 
-  def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
-                      [/* Intentionally does not match a pattern */]>;
-
   // Could use v4i32, but won't for clarity
   def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
                        [/* Intentionally does not match a pattern */]>;
@@ -1408,12 +1405,12 @@
 // These are effectively no-ops, but need to exist for proper type conversion
 // and type coercion.
 
-class ORCvtForm<dag OOL, dag IOL>
+class ORCvtForm<dag OOL, dag IOL, list<dag> pattern = [/* no pattern */]>
           : SPUInstr<OOL, IOL, "or\t$rT, $rA, $rA", IntegerOp> {
   bits<7> RA;
   bits<7> RT;
 
-  let Pattern = [/* no pattern */];
+  let Pattern = pattern;
 
   let Inst{0-10} = 0b10000010000;
   let Inst{11-17} = RA;
@@ -1427,29 +1424,29 @@
 class ORExtractElt<RegisterClass rclass>:
     ORCvtForm<(outs rclass:$rT), (ins VECREG:$rA)>;
 
-class ORCvtRegGPRC<RegisterClass rclass>:
-    ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>;
+/* class ORCvtRegGPRC<RegisterClass rclass>:
+    ORCvtForm<(outs GPRC:$rT), (ins rclass:$rA)>; */
 
-class ORCvtVecGPRC:
-    ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
+/* class ORCvtVecGPRC:
+    ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; */
 
-class ORCvtGPRCReg<RegisterClass rclass>:
-    ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>;
+/* class ORCvtGPRCReg<RegisterClass rclass>:
+    ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>; */
     
-class ORCvtFormR32Reg<RegisterClass rclass>:
-    ORCvtForm<(outs rclass:$rT), (ins R32C:$rA)>;
+class ORCvtFormR32Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
+    ORCvtForm<(outs rclass:$rT), (ins R32C:$rA), pattern>;
     
-class ORCvtFormRegR32<RegisterClass rclass>:
-    ORCvtForm<(outs R32C:$rT), (ins rclass:$rA)>;
+class ORCvtFormRegR32<RegisterClass rclass, list<dag> pattern = [ ]>:
+    ORCvtForm<(outs R32C:$rT), (ins rclass:$rA), pattern>;
 
-class ORCvtFormR64Reg<RegisterClass rclass>:
-    ORCvtForm<(outs rclass:$rT), (ins R64C:$rA)>;
+class ORCvtFormR64Reg<RegisterClass rclass, list<dag> pattern = [ ]>:
+    ORCvtForm<(outs rclass:$rT), (ins R64C:$rA), pattern>;
     
-class ORCvtFormRegR64<RegisterClass rclass>:
-    ORCvtForm<(outs R64C:$rT), (ins rclass:$rA)>;
+class ORCvtFormRegR64<RegisterClass rclass, list<dag> pattern = [ ]>:
+    ORCvtForm<(outs R64C:$rT), (ins rclass:$rA), pattern>;
 
-class ORCvtGPRCVec:
-    ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
+/* class ORCvtGPRCVec:
+    ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>; */
 
 multiclass BitwiseOr
 {
@@ -1468,10 +1465,11 @@
                           (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
                                                  (v2i64 VECREG:$rB)))))]>;
 
-  def r64: ORRegInst<R64C>;
-  def r32: ORRegInst<R32C>;
-  def r16: ORRegInst<R16C>;
-  def r8:  ORRegInst<R8C>;
+  def r128: ORRegInst<GPRC>;
+  def r64:  ORRegInst<R64C>;
+  def r32:  ORRegInst<R32C>;
+  def r16:  ORRegInst<R16C>;
+  def r8:   ORRegInst<R8C>;
 
   // OR instructions used to copy f32 and f64 registers.
   def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
@@ -1496,6 +1494,7 @@
   def f32_v4f32: ORExtractElt<R32FP>;
   def f64_v2f64: ORExtractElt<R64FP>;
 
+/*
   // Conversion from GPRC to register
   def i128_r64:  ORCvtRegGPRC<R64C>;
   def i128_f64:  ORCvtRegGPRC<R64FP>;
@@ -1517,7 +1516,8 @@
 
   // Conversion from vector to GPRC
   def vec_i128:  ORCvtGPRCVec;
-  
+*/
+/*
   // Conversion from register to R32C:
   def r16_r32:   ORCvtFormRegR32<R16C>;
   def r8_r32:    ORCvtFormRegR32<R8C>;
@@ -1535,6 +1535,18 @@
   def r64_r32:   ORCvtFormRegR64<R32C>;
   def r64_r16:   ORCvtFormRegR64<R16C>;
   def r64_r8:    ORCvtFormRegR64<R8C>;
+*/
+
+  // bitconvert patterns:
+  def r32_f32:   ORCvtFormR32Reg<R32FP,
+                                 [(set R32FP:$rT, (bitconvert R32C:$rA))]>;
+  def f32_r32:   ORCvtFormRegR32<R32FP,
+                                 [(set R32C:$rT, (bitconvert R32FP:$rA))]>;
+
+  def r64_f64:   ORCvtFormR64Reg<R64FP,
+                                 [(set R64FP:$rT, (bitconvert R64C:$rA))]>;
+  def f64_r64:   ORCvtFormRegR64<R64FP,
+                                 [(set R64C:$rT, (bitconvert R64FP:$rA))]>;
 }
 
 defm OR : BitwiseOr;
@@ -1960,7 +1972,7 @@
 			       (v4f32 VECREG:$rB),
 			       (v4f32 VECREG:$rA)))]>;
 
-  // SELBr64_cond is defined further down, look for i64 comparisons
+  // SELBr64_cond is defined in SPU64InstrInfo.td
   def r32_cond:   SELBRegCondInst<R32C, R32C>;
   def f32_cond:   SELBRegCondInst<R32C, R32FP>;
   def r16_cond:   SELBRegCondInst<R16C, R16C>;
@@ -2146,14 +2158,6 @@
              [(set (vectype VECREG:$rT),
                    (SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>;
 
-// $rB gets promoted to 32-bit register type when confronted with
-// this llvm assembly code:
-//
-// define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) {
-//      %A = shl i16 %arg1, %arg2
-//      ret i16 %A
-// }
-
 multiclass ShiftLeftHalfword
 {
   def v8i16: SHLHVecInst<v8i16>;
@@ -2250,6 +2254,10 @@
                [(set (vectype VECREG:$rT),
                      (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
 
+class SHLQBIRegInst<RegisterClass rclass>:
+    SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+               [/* no pattern */]>;
+
 multiclass ShiftLeftQuadByBits
 {
   def v16i8: SHLQBIVecInst<v16i8>;
@@ -2258,6 +2266,8 @@
   def v4f32: SHLQBIVecInst<v4f32>;
   def v2i64: SHLQBIVecInst<v2i64>;
   def v2f64: SHLQBIVecInst<v2f64>;
+
+  def r128:  SHLQBIRegInst<GPRC>;
 }
 
 defm SHLQBI : ShiftLeftQuadByBits;
@@ -2335,6 +2345,32 @@
 
 defm SHLQBYI : ShiftLeftQuadBytesImm;
 
+class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
+           RotateShift, pattern>;
+
+class SHLQBYBIVecInst<ValueType vectype>:
+    SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+                [/* no pattern */]>;
+
+class SHLQBYBIRegInst<RegisterClass rclass>:
+    SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+                 [/* no pattern */]>;
+
+multiclass ShiftLeftQuadBytesBitCount
+{
+  def v16i8: SHLQBYBIVecInst<v16i8>;
+  def v8i16: SHLQBYBIVecInst<v8i16>;
+  def v4i32: SHLQBYBIVecInst<v4i32>;
+  def v4f32: SHLQBYBIVecInst<v4f32>;
+  def v2i64: SHLQBYBIVecInst<v2i64>;
+  def v2f64: SHLQBYBIVecInst<v2f64>;
+
+  def r128:  SHLQBYBIRegInst<GPRC>;
+}
+
+defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
+
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 // Rotate halfword:
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@@ -4285,13 +4321,6 @@
           (ANDfabsvec (v4f32 VECREG:$rA),
                       (v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
 
-def : Pat<(fabs R64FP:$rA),
-          (ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>;
-
-def : Pat<(fabs (v2f64 VECREG:$rA)),
-          (ANDfabsvec (v2f64 VECREG:$rA),
-                      (v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
-
 //===----------------------------------------------------------------------===//
 // Hint for branch instructions:
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/CodeGen/CellSPU/fneg-fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/fneg-fabs.ll?rev=62664&r1=62663&r2=62664&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/CellSPU/fneg-fabs.ll (original)
+++ llvm/trunk/test/CodeGen/CellSPU/fneg-fabs.ll Tue Jan 20 22:58:48 2009
@@ -1,9 +1,9 @@
 ; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: grep fsmbi   %t1.s | count 3
+; RUN: grep fsmbi   %t1.s | count 2
 ; RUN: grep 32768   %t1.s | count 2
 ; RUN: grep xor     %t1.s | count 4
-; RUN: grep and     %t1.s | count 5
-; RUN: grep andbi   %t1.s | count 3
+; RUN: grep and     %t1.s | count 4
+; RUN: grep andbi   %t1.s | count 2
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"