[llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Mon Apr 17 20:24:43 PDT 2006

Changes in directory llvm/lib/Target/PowerPC:

PPCISelLowering.cpp updated: 1.160 -> 1.161
---
Log message:

Custom lower v4i32 multiplies into a cute sequence, instead of having legalize
scalarize the sequence into 4 mullw's and a bunch of load/store traffic.

This speeds up v4i32 multiplies 4.1x (measured) on a G5.  This implements
PowerPC/vec_mul.ll


---
Diffs of the changes:  (+53 -10)

 PPCISelLowering.cpp |   63 +++++++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 53 insertions(+), 10 deletions(-)


Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff -u llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.160 llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.161

--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.160	Mon Apr 17 13:09:22 2006
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp	Mon Apr 17 22:24:30 2006
@@ -227,6 +227,7 @@
     addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
     
     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 
     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
@@ -1062,14 +1063,27 @@
   return DAG.getNode(ISD::BIT_CONVERT, VT, Res);
 }
 
-/// BuildIntrinsicBinOp - Return a binary operator intrinsic node with the
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
 /// specified intrinsic ID.
-static SDOperand BuildIntrinsicBinOp(unsigned IID, SDOperand LHS, SDOperand RHS,
-                                     SelectionDAG &DAG) {
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, LHS.getValueType(),
+static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS,
+                                  SelectionDAG &DAG, 
+                                  MVT::ValueType DestVT = MVT::Other) {
+  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
                      DAG.getConstant(IID, MVT::i32), LHS, RHS);
 }
 
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1,
+                                  SDOperand Op2, SelectionDAG &DAG, 
+                                  MVT::ValueType DestVT = MVT::Other) {
+  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT,
+                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
 /// amount.  The result has the specified value type.
 static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt,
@@ -1145,8 +1159,8 @@
       SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG);
       
       // Make the VSLW intrinsic, computing 0x8000_0000.
-      SDOperand Res = BuildIntrinsicBinOp(Intrinsic::ppc_altivec_vslw, OnesV, 
-                                          OnesV, DAG);
+      SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 
+                                       OnesV, DAG);
       
       // xor by OnesV to invert it.
       Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
@@ -1175,7 +1189,7 @@
           Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
           Intrinsic::ppc_altivec_vslw
         };
-        return BuildIntrinsicBinOp(IIDs[SplatSize-1], Op, Op, DAG);
+        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
       }
       
       // vsplti + srl self.
@@ -1185,7 +1199,7 @@
           Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
           Intrinsic::ppc_altivec_vsrw
         };
-        return BuildIntrinsicBinOp(IIDs[SplatSize-1], Op, Op, DAG);
+        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
       }
       
       // vsplti + sra self.
@@ -1195,7 +1209,7 @@
           Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
           Intrinsic::ppc_altivec_vsraw
         };
-        return BuildIntrinsicBinOp(IIDs[SplatSize-1], Op, Op, DAG);
+        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
       }
       
       // vsplti + rol self.
@@ -1206,7 +1220,7 @@
           Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
           Intrinsic::ppc_altivec_vrlw
         };
-        return BuildIntrinsicBinOp(IIDs[SplatSize-1], Op, Op, DAG);
+        return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG);
       }
 
       // t = vsplti c, result = vsldoi t, t, 1
@@ -1558,6 +1572,34 @@
   return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL));
 }
 
+static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) {
+  assert(Op.getValueType() == MVT::v4i32 && "Unknown mul to lower!");
+  SDOperand LHS = Op.getOperand(0);
+  SDOperand RHS = Op.getOperand(1);
+  
+  SDOperand Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG);
+  SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG);  // +16 as shift amt.
+  
+  SDOperand RHSSwap =   // = vrlw RHS, 16
+    BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG);
+  
+  // Shrinkify inputs to v8i16.
+  LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS);
+  RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS);
+  RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap);
+  
+  // Low parts multiplied together, generating 32-bit results (we ignore the top
+  // parts).
+  SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+                                      LHS, RHS, DAG, MVT::v4i32);
+  
+  SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+                                      LHS, RHSSwap, Zero, DAG, MVT::v4i32);
+  // Shift the high parts up 16 bits.
+  HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG);
+  return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd);
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
@@ -1583,6 +1625,7 @@
   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
+  case ISD::MUL:                return LowerMUL(Op, DAG);
   }
   return SDOperand();
 }