[llvm-commits] [llvm] r67067 - in /llvm/trunk: lib/Target/CellSPU/SPUISelDAGToDAG.cpp lib/Target/CellSPU/SPUISelLowering.cpp lib/Target/CellSPU/SPUISelLowering.h lib/Target/CellSPU/SPUInstrInfo.cpp lib/Target/CellSPU/SPUInstrInfo.td test/CodeGen/CellSPU/2009-01-01-BrCond.ll test/CodeGen/CellSPU/fneg-fabs.ll
Scott Michel
scottm at aero.org
Mon Mar 16 18:15:46 PDT 2009
Author: pingbak
Date: Mon Mar 16 20:15:45 2009
New Revision: 67067
URL: http://llvm.org/viewvc/llvm-project?rev=67067&view=rev
Log:
CellSPU:
- Fix fabs, fneg for f32 and f64.
- Use BuildVectorSDNode.isConstantSplat, now that the functionality exists
- Continue to improve i64 constant lowering. Lower certain special constants
to the constant pool when they correspond to SPU's shufb instruction's
special mask values. This avoids the overhead of performing a shuffle on a
zero-filled vector just to get the special constant when the memory load
suffices.
Modified:
llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp
llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h
llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp
llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td
llvm/trunk/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
llvm/trunk/test/CodeGen/CellSPU/fneg-fabs.ll
Modified: llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp?rev=67067&r1=67066&r2=67067&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp Mon Mar 16 20:15:45 2009
@@ -200,182 +200,212 @@
return retval;
}
-}
-namespace {
+ //! Generate the carry-generate shuffle mask.
+ SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-//===--------------------------------------------------------------------===//
-/// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
-/// instructions for SelectionDAG operations.
-///
-class SPUDAGToDAGISel :
- public SelectionDAGISel
-{
- SPUTargetMachine &TM;
- SPUTargetLowering &SPUtli;
- unsigned GlobalBaseReg;
-
-public:
- explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
- SelectionDAGISel(tm),
- TM(tm),
- SPUtli(*tm.getTargetLowering())
- { }
-
- virtual bool runOnFunction(Function &Fn) {
- // Make sure we re-emit a set of the global base reg if necessary
- GlobalBaseReg = 0;
- SelectionDAGISel::runOnFunction(Fn);
- return true;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
}
- /// getI32Imm - Return a target constant with the specified value, of type
- /// i32.
- inline SDValue getI32Imm(uint32_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
- }
-
- /// getI64Imm - Return a target constant with the specified value, of type
- /// i64.
- inline SDValue getI64Imm(uint64_t Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i64);
- }
-
- /// getSmallIPtrImm - Return a target constant of pointer type.
- inline SDValue getSmallIPtrImm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
- }
-
- SDNode *emitBuildVector(SDValue build_vec) {
- MVT vecVT = build_vec.getValueType();
- SDNode *bvNode = build_vec.getNode();
- DebugLoc dl = bvNode->getDebugLoc();
-
- // Check to see if this vector can be represented as a CellSPU immediate
- // constant by invoking all of the instruction selection predicates:
- if (((vecVT == MVT::v8i16) &&
- (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
- ((vecVT == MVT::v4i32) &&
- ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
- (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
- (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
- (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
- ((vecVT == MVT::v2i64) &&
- ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
- (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
- (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0))))
- return Select(build_vec);
-
- // No, need to emit a constant pool spill:
- std::vector<Constant*> CV;
-
- for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i));
- CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
- }
-
- Constant *CP = ConstantVector::get(CV);
- SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
- unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- SDValue CGPoolOffset =
- SPU::LowerConstantPool(CPIdx, *CurDAG,
- SPUtli.getSPUTargetMachine());
- return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl,
- CurDAG->getEntryNode(), CGPoolOffset,
- PseudoSourceValue::getConstantPool(), 0,
- false, Alignment));
- }
-
- /// Select - Convert the specified operand from a target-independent to a
- /// target-specific node if it hasn't already been changed.
- SDNode *Select(SDValue Op);
-
- //! Emit the instruction sequence for i64 shl
- SDNode *SelectSHLi64(SDValue &Op, MVT OpVT);
-
- //! Emit the instruction sequence for i64 srl
- SDNode *SelectSRLi64(SDValue &Op, MVT OpVT);
-
- //! Emit the instruction sequence for i64 sra
- SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
-
- //! Emit the necessary sequence for loading i64 constants:
- SDNode *SelectI64Constant(SDValue &Op, MVT OpVT);
-
- //! Returns true if the address N is an A-form (local store) address
- bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
- SDValue &Index);
-
- //! D-form address predicate
- bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
- SDValue &Index);
-
- /// Alternate D-form address using i7 offset predicate
- bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
- SDValue &Base);
-
- /// D-form address selection workhorse
- bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp,
- SDValue &Base, int minOffset, int maxOffset);
-
- //! Address predicate if N can be expressed as an indexed [r+r] operation.
- bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
- SDValue &Index);
-
- /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
- /// inline asm expressions.
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps) {
- SDValue Op0, Op1;
- switch (ConstraintCode) {
- default: return true;
- case 'm': // memory
- if (!SelectDFormAddr(Op, Op, Op0, Op1)
- && !SelectAFormAddr(Op, Op, Op0, Op1))
- SelectXFormAddr(Op, Op, Op0, Op1);
- break;
- case 'o': // offsetable
- if (!SelectDFormAddr(Op, Op, Op0, Op1)
- && !SelectAFormAddr(Op, Op, Op0, Op1)) {
- Op0 = Op;
- Op1 = getSmallIPtrImm(0);
+ //! Generate the borrow-generate shuffle mask
+ SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //===------------------------------------------------------------------===//
+ /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class SPUDAGToDAGISel :
+ public SelectionDAGISel
+ {
+ SPUTargetMachine &TM;
+ SPUTargetLowering &SPUtli;
+ unsigned GlobalBaseReg;
+
+ public:
+ explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm),
+ SPUtli(*tm.getTargetLowering())
+ { }
+
+ virtual bool runOnFunction(Function &Fn) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnFunction(Fn);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(uint32_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
}
- break;
- case 'v': // not offsetable
+
+ SDNode *emitBuildVector(SDValue build_vec) {
+ MVT vecVT = build_vec.getValueType();
+ MVT eltVT = vecVT.getVectorElementType();
+ SDNode *bvNode = build_vec.getNode();
+ DebugLoc dl = bvNode->getDebugLoc();
+
+ // Check to see if this vector can be represented as a CellSPU immediate
+ // constant by invoking all of the instruction selection predicates:
+ if (((vecVT == MVT::v8i16) &&
+ (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
+ ((vecVT == MVT::v4i32) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
+ ((vecVT == MVT::v2i64) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0))))
+ return Select(build_vec);
+
+ // No, need to emit a constant pool spill:
+ std::vector<Constant*> CV;
+
+ for (size_t i = 0; i < build_vec.getNumOperands(); ++i) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode > (build_vec.getOperand(i));
+ CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
+ }
+
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue CGPoolOffset =
+ SPU::LowerConstantPool(CPIdx, *CurDAG,
+ SPUtli.getSPUTargetMachine());
+ return SelectCode(CurDAG->getLoad(build_vec.getValueType(), dl,
+ CurDAG->getEntryNode(), CGPoolOffset,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, Alignment));
+ }
+
+ /// Select - Convert the specified operand from a target-independent to a
+ /// target-specific node if it hasn't already been changed.
+ SDNode *Select(SDValue Op);
+
+ //! Emit the instruction sequence for i64 shl
+ SDNode *SelectSHLi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the instruction sequence for i64 srl
+ SDNode *SelectSRLi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the instruction sequence for i64 sra
+ SDNode *SelectSRAi64(SDValue &Op, MVT OpVT);
+
+ //! Emit the necessary sequence for loading i64 constants:
+ SDNode *SelectI64Constant(SDValue &Op, MVT OpVT, DebugLoc dl);
+
+ //! Alternate instruction emit sequence for loading i64 constants
+ SDNode *SelectI64Constant(uint64_t i64const, MVT OpVT, DebugLoc dl);
+
+ //! Returns true if the address N is an A-form (local store) address
+ bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ //! D-form address predicate
+ bool SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// Alternate D-form address using i7 offset predicate
+ bool SelectDForm2Addr(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base);
+
+ /// D-form address selection workhorse
+ bool DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Disp,
+ SDValue &Base, int minOffset, int maxOffset);
+
+ //! Address predicate if N can be expressed as an indexed [r+r] operation.
+ bool SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectDFormAddr(Op, Op, Op0, Op1)
+ && !SelectAFormAddr(Op, Op, Op0, Op1))
+ SelectXFormAddr(Op, Op, Op0, Op1);
+ break;
+ case 'o': // offsetable
+ if (!SelectDFormAddr(Op, Op, Op0, Op1)
+ && !SelectAFormAddr(Op, Op, Op0, Op1)) {
+ Op0 = Op;
+ Op1 = getSmallIPtrImm(0);
+ }
+ break;
+ case 'v': // not offsetable
#if 1
- assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled.");
+ assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled.");
#else
- SelectAddrIdxOnly(Op, Op, Op0, Op1);
+ SelectAddrIdxOnly(Op, Op, Op0, Op1);
#endif
- break;
- }
+ break;
+ }
- OutOps.push_back(Op0);
- OutOps.push_back(Op1);
- return false;
- }
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
+ /// InstructionSelect - This callback is invoked by
+ /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
+ virtual void InstructionSelect();
- virtual const char *getPassName() const {
- return "Cell SPU DAG->DAG Pattern Instruction Selection";
- }
+ virtual const char *getPassName() const {
+ return "Cell SPU DAG->DAG Pattern Instruction Selection";
+ }
- /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
- /// this target when scheduling the DAG.
- virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
- const TargetInstrInfo *II = TM.getInstrInfo();
- assert(II && "No InstrInfo?");
- return new SPUHazardRecognizer(*II);
- }
+ /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+ /// this target when scheduling the DAG.
+ virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() {
+ const TargetInstrInfo *II = TM.getInstrInfo();
+ assert(II && "No InstrInfo?");
+ return new SPUHazardRecognizer(*II);
+ }
- // Include the pieces autogenerated from the target description.
+ // Include the pieces autogenerated from the target description.
#include "SPUGenDAGISel.inc"
-};
-
+ };
}
/// InstructionSelect - This callback is invoked by
@@ -689,7 +719,7 @@
// Catch the i64 constants that end up here. Note: The backend doesn't
// attempt to legalize the constant (it's useless because DAGCombiner
// will insert 64-bit constants and we can't stop it).
- return SelectI64Constant(Op, OpVT);
+ return SelectI64Constant(Op, OpVT, Op.getDebugLoc());
} else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
&& OpVT == MVT::i64) {
SDValue Op0 = Op.getOperand(0);
@@ -747,21 +777,21 @@
zextShuffle));
} else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
- emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG, dl));
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
} else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
- emitBuildVector(SPU::getBorrowGenerateShufMask(*CurDAG, dl));
+ emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl));
return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
Op.getOperand(0), Op.getOperand(1),
SDValue(CGLoad, 0)));
} else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
- emitBuildVector(SPU::getCarryGenerateShufMask(*CurDAG, dl));
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl));
return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
Op.getOperand(0), Op.getOperand(1),
@@ -813,6 +843,54 @@
if (OpVT == MVT::i64) {
return SelectSRAi64(Op, OpVT);
}
+ } else if (Opc == ISD::FNEG
+ && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
+ DebugLoc dl = Op.getDebugLoc();
+ // Check if the pattern is a special form of DFNMS:
+ // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
+ SDValue Op0 = Op.getOperand(0);
+ if (Op0.getOpcode() == ISD::FSUB) {
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == ISD::FMUL) {
+ unsigned Opc = SPU::DFNMSf64;
+ if (OpVT == MVT::v2f64)
+ Opc = SPU::DFNMSv2f64;
+
+ return CurDAG->getTargetNode(Opc, dl, OpVT,
+ Op00.getOperand(0),
+ Op00.getOperand(1),
+ Op0.getOperand(1));
+ }
+ }
+
+ SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
+ SDNode *signMask = 0;
+ unsigned Opc = SPU::ORfneg64;
+
+ if (OpVT == MVT::f64) {
+ signMask = SelectI64Constant(negConst, MVT::i64, dl);
+ } else if (OpVT == MVT::v2f64) {
+ Opc = SPU::ORfnegvec;
+ signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v2i64,
+ negConst, negConst));
+ }
+
+ return CurDAG->getTargetNode(Opc, dl, OpVT,
+ Op.getOperand(0), SDValue(signMask, 0));
+ } else if (Opc == ISD::FABS) {
+ if (OpVT == MVT::f64) {
+ SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
+ return CurDAG->getTargetNode(SPU::ANDfabs64, dl, OpVT,
+ Op.getOperand(0), SDValue(signMask, 0));
+ } else if (OpVT == MVT::v2f64) {
+ SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
+ SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ absConst, absConst);
+ SDNode *signMask = emitBuildVector(absVec);
+ return CurDAG->getTargetNode(SPU::ANDfabsvec, dl, OpVT,
+ Op.getOperand(0), SDValue(signMask, 0));
+ }
} else if (Opc == SPUISD::LDRESULT) {
// Custom select instructions for LDRESULT
MVT VT = N->getValueType(0);
@@ -1087,13 +1165,17 @@
/*!
Do the necessary magic necessary to load a i64 constant
*/
-SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT) {
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT,
+ DebugLoc dl) {
ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
- // Currently there's no DL on the input, but won't hurt to pretend.
- DebugLoc dl = Op.getDebugLoc();
+ return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
+}
+
+SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT,
+ DebugLoc dl) {
MVT OpVecVT = MVT::getVectorVT(OpVT, 2);
SDValue i64vec =
- SPU::LowerSplat_v2i64(OpVecVT, *CurDAG, CN->getZExtValue(), dl);
+ SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
// Here's where it gets interesting, because we have to parse out the
// subtree handed back in i64vec:
@@ -1143,8 +1225,11 @@
SDValue(lhsNode, 0), SDValue(rhsNode, 0),
SDValue(shufMaskNode, 0)));
- return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
SDValue(shufNode, 0));
+ } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
+ return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT,
+ SDValue(emitBuildVector(i64vec), 0));
} else {
cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n";
abort();
Modified: llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp?rev=67067&r1=67066&r2=67067&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp Mon Mar 16 20:15:45 2009
@@ -1,5 +1,5 @@
-//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
//
+//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
@@ -1353,7 +1353,7 @@
}
}
- return 0; // All UNDEF: use implicit def.; not Constant node
+ return 0;
}
/// get_vec_i18imm - Test if this vector is a vector filled with the same value
@@ -1480,131 +1480,30 @@
return SDValue();
}
-// If this is a vector of constants or undefs, get the bits. A bit in
-// UndefBits is set if the corresponding element of the vector is an
-// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
-// zero. Return true if this is not an array of constants, false if it is.
-//
-static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
- uint64_t UndefBits[2]) {
- // Start with zero'd results.
- VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
-
- unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
- for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
- SDValue OpVal = BV->getOperand(i);
-
- unsigned PartNo = i >= e/2; // In the upper 128 bits?
- unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
-
- uint64_t EltBits = 0;
- if (OpVal.getOpcode() == ISD::UNDEF) {
- uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
- UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
- continue;
- } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
- EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
- } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
- const APFloat &apf = CN->getValueAPF();
- EltBits = (CN->getValueType(0) == MVT::f32
- ? FloatToBits(apf.convertToFloat())
- : DoubleToBits(apf.convertToDouble()));
- } else {
- // Nonconstant element.
- return true;
- }
-
- VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
- }
-
- //printf("%llx %llx %llx %llx\n",
- // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
- return false;
-}
-
-/// If this is a splat (repetition) of a value across the whole vector, return
-/// the smallest size that splats it. For example, "0x01010101010101..." is a
-/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
-/// SplatSize = 1 byte.
-static bool isConstantSplat(const uint64_t Bits128[2],
- const uint64_t Undef128[2],
- int MinSplatBits,
- uint64_t &SplatBits, uint64_t &SplatUndef,
- int &SplatSize) {
- // Don't let undefs prevent splats from matching. See if the top 64-bits are
- // the same as the lower 64-bits, ignoring undefs.
- uint64_t Bits64 = Bits128[0] | Bits128[1];
- uint64_t Undef64 = Undef128[0] & Undef128[1];
- uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
- uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
- uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
- uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
-
- if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
- if (MinSplatBits < 64) {
-
- // Check that the top 32-bits are the same as the lower 32-bits, ignoring
- // undefs.
- if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
- if (MinSplatBits < 32) {
-
- // If the top 16-bits are different than the lower 16-bits, ignoring
- // undefs, we have an i32 splat.
- if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
- if (MinSplatBits < 16) {
- // If the top 8-bits are different than the lower 8-bits, ignoring
- // undefs, we have an i16 splat.
- if ((Bits16 & (uint16_t(~Undef16) >> 8))
- == ((Bits16 >> 8) & ~Undef16)) {
- // Otherwise, we have an 8-bit splat.
- SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
- SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
- SplatSize = 1;
- return true;
- }
- } else {
- SplatBits = Bits16;
- SplatUndef = Undef16;
- SplatSize = 2;
- return true;
- }
- }
- } else {
- SplatBits = Bits32;
- SplatUndef = Undef32;
- SplatSize = 4;
- return true;
- }
- }
- } else {
- SplatBits = Bits128[0];
- SplatUndef = Undef128[0];
- SplatSize = 8;
- return true;
- }
- }
-
- return false; // Can't be a splat if two pieces don't match.
-}
-
//! Lower a BUILD_VECTOR instruction creatively:
SDValue
LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
+ MVT EltVT = VT.getVectorElementType();
DebugLoc dl = Op.getDebugLoc();
- // If this is a vector of constants or undefs, get the bits. A bit in
- // UndefBits is set if the corresponding element of the vector is an
- // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
- // zero.
- uint64_t VectorBits[2];
- uint64_t UndefBits[2];
- uint64_t SplatBits, SplatUndef;
- int SplatSize;
- if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
- || !isConstantSplat(VectorBits, UndefBits,
- VT.getVectorElementType().getSizeInBits(),
- SplatBits, SplatUndef, SplatSize))
- return SDValue(); // Not a constant vector, not a splat.
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
+ unsigned minSplatBits = EltVT.getSizeInBits();
+
+ if (minSplatBits < 16)
+ minSplatBits = 16;
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ || minSplatBits < SplatBitSize)
+ return SDValue(); // Wasn't a constant vector or splat exceeded min
+
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+ unsigned SplatSize = SplatBitSize / 8;
switch (VT.getSimpleVT()) {
default:
@@ -1620,8 +1519,7 @@
// NOTE: pretend the constant is an integer. LLVM won't load FP constants
SDValue T = DAG.getConstant(Value32, MVT::i32);
return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
- DAG.getNode(ISD::BUILD_VECTOR, dl,
- MVT::v4i32, T, T, T, T));
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T, T, T, T));
break;
}
case MVT::v2f64: {
@@ -1636,45 +1534,42 @@
}
case MVT::v16i8: {
// 8-bit constants have to be expanded to 16-bits
- unsigned short Value16 = SplatBits | (SplatBits << 8);
- SDValue Ops[8];
- for (int i = 0; i < 8; ++i)
- Ops[i] = DAG.getConstant(Value16, MVT::i16);
+ unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, Ops, 8));
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
}
case MVT::v8i16: {
- unsigned short Value16;
- if (SplatSize == 2)
- Value16 = (unsigned short) (SplatBits & 0xffff);
- else
- Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
- SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
- SDValue Ops[8];
- for (int i = 0; i < 8; ++i) Ops[i] = T;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops, 8);
+ unsigned short Value16 = SplatBits;
+ SDValue T = DAG.getConstant(Value16, EltVT);
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, T);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
}
case MVT::v4i32: {
- unsigned int Value = SplatBits;
- SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
}
case MVT::v2i32: {
- unsigned int Value = SplatBits;
- SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
}
case MVT::v2i64: {
- return SPU::LowerSplat_v2i64(VT, DAG, SplatBits, dl);
+ return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
}
}
return SDValue();
}
+/*!
+ */
SDValue
-SPU::LowerSplat_v2i64(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
- DebugLoc dl) {
+SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+ DebugLoc dl) {
uint32_t upper = uint32_t(SplatVal >> 32);
uint32_t lower = uint32_t(SplatVal);
@@ -1685,10 +1580,6 @@
DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
Val, Val, Val, Val));
} else {
- SDValue LO32;
- SDValue HI32;
- SmallVector<SDValue, 16> ShufBytes;
- SDValue Result;
bool upper_special, lower_special;
// NOTE: This code creates common-case shuffle masks that can be easily
@@ -1699,6 +1590,18 @@
upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+ // Both upper and lower are special, lower to a constant pool load:
+ if (lower_special && upper_special) {
+ SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ SplatValCN, SplatValCN);
+ }
+
+ SDValue LO32;
+ SDValue HI32;
+ SmallVector<SDValue, 16> ShufBytes;
+ SDValue Result;
+
// Create lower vector if not a special pattern
if (!lower_special) {
SDValue LO32C = DAG.getConstant(lower, MVT::i32);
@@ -1721,13 +1624,6 @@
LO32 = HI32;
if (upper_special)
HI32 = LO32;
- if (lower_special && upper_special) {
- // Unhappy situation... both upper and lower are special, so punt with
- // a target constant:
- SDValue Zero = DAG.getConstant(0, MVT::i32);
- HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Zero, Zero,
- Zero, Zero);
- }
for (int i = 0; i < 4; ++i) {
uint64_t val = 0;
@@ -2022,9 +1918,9 @@
ShufMask[i] = DAG.getConstant(bits, MVT::i32);
}
- SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufMask[0],
- sizeof(ShufMask) / sizeof(ShufMask[0]));
+ SDValue ShufMaskVec =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
@@ -2067,28 +1963,28 @@
/*NOTREACHED*/
case MVT::i8: {
SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
- factor, factor);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
break;
}
case MVT::i16: {
SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
- factor, factor);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
break;
}
case MVT::i32:
case MVT::f32: {
SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, factor, factor,
- factor, factor);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
break;
}
case MVT::i64:
case MVT::f64: {
SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
- replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
loFactor, hiFactor, loFactor, hiFactor);
break;
}
@@ -2164,71 +2060,65 @@
case ISD::ROTR:
case ISD::ROTL: {
SDValue N1 = Op.getOperand(1);
- unsigned N1Opc;
- N0 = (N0.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
- MVT::i16));
- N1Opc = N1.getValueType().bitsLT(ShiftVT)
- ? ISD::ZERO_EXTEND
- : ISD::TRUNCATE;
- N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, dl, ShiftVT, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- TLI.getShiftAmountTy()));
+ MVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
+ ? ISD::ZERO_EXTEND
+ : ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ // Replicate lower 8-bits into upper 8:
SDValue ExpandArg =
DAG.getNode(ISD::OR, dl, MVT::i16, N0,
DAG.getNode(ISD::SHL, dl, MVT::i16,
N0, DAG.getConstant(8, MVT::i32)));
+
+ // Truncate back down to i8
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
}
case ISD::SRL:
case ISD::SHL: {
SDValue N1 = Op.getOperand(1);
- unsigned N1Opc;
- N0 = (N0.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
- MVT::i32));
- N1Opc = N1.getValueType().bitsLT(ShiftVT)
- ? ISD::ZERO_EXTEND
- : ISD::TRUNCATE;
- N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, dl, ShiftVT, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
+ MVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::ZERO_EXTEND;
+
+ if (N1.getValueType().bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
case ISD::SRA: {
SDValue N1 = Op.getOperand(1);
- unsigned N1Opc;
- N0 = (N0.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
- MVT::i16));
- N1Opc = N1.getValueType().bitsLT(ShiftVT)
- ? ISD::SIGN_EXTEND
- : ISD::TRUNCATE;
- N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, dl, ShiftVT, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- ShiftVT));
+ MVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::SIGN_EXTEND;
+
+ if (N1VT.bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
}
case ISD::MUL: {
SDValue N1 = Op.getOperand(1);
- unsigned N1Opc;
- N0 = (N0.getOpcode() != ISD::Constant
- ? DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
- MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
- N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, dl, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
- MVT::i16));
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(Opc, dl, MVT::i16, N0, N1));
break;
@@ -2238,36 +2128,6 @@
return SDValue();
}
-//! Generate the carry-generate shuffle mask.
-SDValue SPU::getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
- SmallVector<SDValue, 16 > ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size());
-}
-
-//! Generate the borrow-generate shuffle mask
-SDValue SPU::getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
- SmallVector<SDValue, 16 > ShufBytes;
-
- // Create the shuffle mask for "rotating" the borrow up one register slot
- // once the borrow is generated.
- ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
- ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
- &ShufBytes[0], ShufBytes.size());
-}
-
//! Lower byte immediate operations for v16i8 vectors:
static SDValue
LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
@@ -2291,26 +2151,24 @@
}
if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
- uint64_t VectorBits[2];
- uint64_t UndefBits[2];
- uint64_t SplatBits, SplatUndef;
- int SplatSize;
-
- if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
- && isConstantSplat(VectorBits, UndefBits,
- VT.getVectorElementType().getSizeInBits(),
- SplatBits, SplatUndef, SplatSize)) {
- SDValue tcVec[16];
- SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
- const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
- // Turn the BUILD_VECTOR into a set of target constants:
- for (size_t i = 0; i < tcVecSize; ++i)
- tcVec[i] = tc;
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
+
+ if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ && minSplatBits <= SplatBitSize) {
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+ SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
+ SmallVector<SDValue, 16> tcVec;
+ tcVec.assign(16, tc);
return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
- tcVec, tcVecSize));
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
}
}
@@ -2452,7 +2310,7 @@
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
}
- return Op; // return unmolested, legalized op
+ return SDValue();
}
//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
@@ -2478,7 +2336,7 @@
return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
}
- return Op; // return unmolested, legalized
+ return SDValue();
}
//! Lower ISD::SETCC
Modified: llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h?rev=67067&r1=67066&r2=67067&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h Mon Mar 16 20:15:45 2009
@@ -78,11 +78,9 @@
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
const SPUTargetMachine &TM);
- SDValue LowerSplat_v2i64(MVT OpVT, SelectionDAG &DAG, uint64_t splat,
+ //! Simplify a MVT::v2i64 constant splat to CellSPU-ready form
+ SDValue LowerV2I64Splat(MVT OpVT, SelectionDAG &DAG, uint64_t splat,
DebugLoc dl);
-
- SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl);
- SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl);
}
class SPUTargetMachine; // forward dec'l.
Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp?rev=67067&r1=67066&r2=67067&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp Mon Mar 16 20:15:45 2009
@@ -60,9 +60,6 @@
unsigned& SrcSR, unsigned& DstSR) const {
SrcSR = DstSR = 0; // No sub-registers.
- // Primarily, ORI and OR are generated by copyRegToReg. But, there are other
- // cases where we can safely say that what's being done is really a move
- // (see how PowerPC does this -- it's the model for this code too.)
switch (MI.getOpcode()) {
default:
break;
@@ -167,7 +164,7 @@
MI.getOperand(1).isReg() &&
"invalid SPU OR<type>_<vec> or LR instruction!");
if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
- sourceReg = MI.getOperand(0).getReg();
+ sourceReg = MI.getOperand(1).getReg();
destReg = MI.getOperand(0).getReg();
return true;
}
Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td?rev=67067&r1=67066&r2=67067&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td Mon Mar 16 20:15:45 2009
@@ -1258,10 +1258,9 @@
def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
[/* Intentionally does not match a pattern */]>;
- def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
+ def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
[/* Intentionally does not match a pattern */]>;
- // Could use v4i32, but won't for clarity
def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* Intentionally does not match a pattern */]>;
@@ -1288,10 +1287,11 @@
RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
IntegerOp, pattern>;
-class ANDCVecInst<ValueType vectype>:
+class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
- (vnot (vectype VECREG:$rB))))]>;
+ [(set (vectype VECREG:$rT),
+ (and (vectype VECREG:$rA),
+ (vnot_frag (vectype VECREG:$rB))))]>;
class ANDCRegInst<RegisterClass rclass>:
ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
@@ -1309,6 +1309,9 @@
def r32: ANDCRegInst<R32C>;
def r16: ANDCRegInst<R16C>;
def r8: ANDCRegInst<R8C>;
+
+ // Sometimes, the xor pattern has a bitcast constant:
+ def v16i8_conv: ANDCVecInst<v16i8, vnot_conv>;
}
defm ANDC : AndComplement;
@@ -1480,6 +1483,17 @@
def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
[/* no pattern */]>;
+ // OR instructions used to negate f32 and f64 quantities.
+
+ def fneg32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+ def fneg64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* no pattern */]>;
+
+ def fnegvec: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern, see fneg{32,64} */]>;
+
// scalar->vector promotion, prefslot2vec:
def v16i8_i8: ORPromoteScalar<R8C>;
def v8i16_i16: ORPromoteScalar<R16C>;
@@ -1783,18 +1797,6 @@
def r32: XORRegInst<R32C>;
def r16: XORRegInst<R16C>;
def r8: XORRegInst<R8C>;
-
- // Special forms for floating point instructions.
- // fneg and fabs require bitwise logical ops to manipulate the sign bit.
-
- def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
- [/* no pattern */]>;
-
- def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, VECREG:$rB),
- [/* no pattern */]>;
-
- def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- [/* no pattern, see fneg{32,64} */]>;
}
defm XOR : BitwiseExclusiveOr;
@@ -4239,33 +4241,36 @@
(fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
(v2f64 VECREG:$rC)))]>;
-// FNMS: - (a * b - c)
+// DFNMS: - (a * b - c)
// - (a * b) + c => c - (a * b)
-def FNMSf64 :
- RRForm<0b01111010110, (outs R64FP:$rT),
- (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
- "dfnms\t$rT, $rA, $rB", DPrecFP,
- [(set R64FP:$rT, (fsub R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
+
+class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
+ DPrecFP, pattern>,
RegConstraint<"$rC = $rT">,
NoEncode<"$rC">;
-def : Pat<(fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)),
- (FNMSf64 R64FP:$rA, R64FP:$rB, R64FP:$rC)>;
+class DFNMSVecInst<list<dag> pattern>:
+ DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ pattern>;
-def FNMSv2f64 :
- RRForm<0b01111010110, (outs VECREG:$rT),
- (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "dfnms\t$rT, $rA, $rB", DPrecFP,
- [(set (v2f64 VECREG:$rT),
- (fsub (v2f64 VECREG:$rC),
- (fmul (v2f64 VECREG:$rA),
- (v2f64 VECREG:$rB))))]>,
- RegConstraint<"$rC = $rT">,
- NoEncode<"$rC">;
+class DFNMSRegInst<list<dag> pattern>:
+ DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ pattern>;
-def : Pat<(fneg (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
- (v2f64 VECREG:$rC))),
- (FNMSv2f64 VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
+multiclass DFMultiplySubtract
+{
+ def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB))))]>;
+
+ def f64 : DFNMSRegInst<[(set R64FP:$rT,
+ (fsub R64FP:$rC,
+ (fmul R64FP:$rA, R64FP:$rB)))]>;
+}
+
+defm DFNMS : DFMultiplySubtract;
// - (a * b + c)
// - (a * b) - c
@@ -4293,35 +4298,21 @@
//===----------------------------------------------------------------------==//
def : Pat<(fneg (v4f32 VECREG:$rA)),
- (XORfnegvec (v4f32 VECREG:$rA),
- (v4f32 (ILHUv4i32 0x8000)))>;
+ (ORfnegvec (v4f32 VECREG:$rA),
+ (v4f32 (ILHUv4i32 0x8000)))>;
def : Pat<(fneg R32FP:$rA),
- (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
-
-def : Pat<(fneg (v2f64 VECREG:$rA)),
- (XORfnegvec (v2f64 VECREG:$rA),
- (v2f64 (ANDBIv16i8 (FSMBIv16i8 0x8080), 0x80)))>;
-
-def : Pat<(fneg R64FP:$rA),
- (XORfneg64 R64FP:$rA,
- (ANDBIv16i8 (FSMBIv16i8 0x8080), 0x80))>;
+ (ORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
// Floating point absolute value
+// Note: f64 fabs is custom-selected.
def : Pat<(fabs R32FP:$rA),
(ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
def : Pat<(fabs (v4f32 VECREG:$rA)),
(ANDfabsvec (v4f32 VECREG:$rA),
- (v4f32 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
-
-def : Pat<(fabs R64FP:$rA),
- (ANDfabs64 R64FP:$rA, (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f))>;
-
-def : Pat<(fabs (v2f64 VECREG:$rA)),
- (ANDfabsvec (v2f64 VECREG:$rA),
- (v2f64 (ANDBIv16i8 (FSMBIv16i8 0xffff), 0x7f)))>;
+ (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
//===----------------------------------------------------------------------===//
// Hint for branch instructions:
Modified: llvm/trunk/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/2009-01-01-BrCond.ll?rev=67067&r1=67066&r2=67067&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/CellSPU/2009-01-01-BrCond.ll (original)
+++ llvm/trunk/test/CodeGen/CellSPU/2009-01-01-BrCond.ll Mon Mar 16 20:15:45 2009
@@ -8,11 +8,11 @@
define double @__floatunsidf(i32 %arg_a) nounwind {
entry:
- %in = alloca %struct.fp_number_type, align 8 ; <%struct.fp_number_type*> [#uses=5]
- %0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1 ; <i32*> [#uses=1]
+ %in = alloca %struct.fp_number_type, align 16
+ %0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1
store i32 0, i32* %0, align 4
- %1 = icmp eq i32 %arg_a, 0 ; <i1> [#uses=1]
- %2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0 ; <i32*> [#uses=2]
+ %1 = icmp eq i32 %arg_a, 0
+ %2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0
br i1 %1, label %bb, label %bb1
bb: ; preds = %entry
@@ -26,6 +26,6 @@
ret double 1.0
}
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+; declare i32 @llvm.ctlz.i32(i32) nounwind readnone
declare double @__pack_d(%struct.fp_number_type*)
Modified: llvm/trunk/test/CodeGen/CellSPU/fneg-fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/fneg-fabs.ll?rev=67067&r1=67066&r2=67067&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/CellSPU/fneg-fabs.ll (original)
+++ llvm/trunk/test/CodeGen/CellSPU/fneg-fabs.ll Mon Mar 16 20:15:45 2009
@@ -1,9 +1,7 @@
; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
-; RUN: grep fsmbi %t1.s | count 3
; RUN: grep 32768 %t1.s | count 2
-; RUN: grep xor %t1.s | count 4
-; RUN: grep and %t1.s | count 5
-; RUN: grep andbi %t1.s | count 3
+; RUN: grep or %t1.s | count 4
+; RUN: grep and %t1.s | count 2
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
target triple = "spu"
More information about the llvm-commits
mailing list