[llvm-commits] [llvm] r60526 - in /llvm/trunk/lib/Target/CellSPU: SPUISelDAGToDAG.cpp SPUISelLowering.cpp SPUISelLowering.h SPUInstrInfo.td SPUNodes.td
Scott Michel
scottm at aero.org
Wed Dec 3 19:02:42 PST 2008
Author: pingbak
Date: Wed Dec 3 21:02:42 2008
New Revision: 60526
URL: http://llvm.org/viewvc/llvm-project?rev=60526&view=rev
Log:
CellSPU:
- First patch from Nehal Desai, a new contributor at Aerospace. Nehal's patch
fixes sign/zero/any-extending loads for integers and floating point. Example
code, compiled w/o debugging or optimization where he first noticed the bug:
int main(void) {
float a = 99.0;
printf("%d\n", a);
return 0;
}
Verified that this code actually works on a Cell SPU.
Changes by Scott Michel:
- Fix bug in the value type list constructed by SPUISD::LDRESULT to include
both the load result's result and chain, not just the chain alone.
- Simplify LowerLOAD and remove extraneous and unnecessary chains.
- Remove unused SPUISD pseudo instructions.
Modified:
llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp
llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h
llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td
llvm/trunk/lib/Target/CellSPU/SPUNodes.td
Modified: llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp?rev=60526&r1=60525&r2=60526&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp Wed Dec 3 21:02:42 2008
@@ -676,7 +676,7 @@
Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Zero, Chain);
} else {
- Result = CurDAG->getTargetNode(Opc, MVT::Other, Arg, Arg, Chain);
+ Result = CurDAG->getTargetNode(Opc, VT, MVT::Other, Arg, Arg, Chain);
}
Chain = SDValue(Result, 1);
Modified: llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp?rev=60526&r1=60525&r2=60526&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp Wed Dec 3 21:02:42 2008
@@ -436,12 +436,6 @@
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
- node_names[(unsigned) SPUISD::VEC2PREFSLOT_CHAINED]
- = "SPUISD::VEC2PREFSLOT_CHAINED";
- node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
- node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
- node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
- node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
@@ -458,8 +452,6 @@
node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
"SPUISD::ROTQUAD_RZ_BITS";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
- node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
- "SPUISD::ROTBYTES_LEFT_CHAINED";
node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
"SPUISD::ROTBYTES_LEFT_BITS";
node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
@@ -597,13 +589,24 @@
/*!
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
within a 16-byte block, we have to rotate to extract the requested element.
- */
+
+ For extending loads, we also want to ensure that the following sequence is
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
+
+\verbatim
+%1 v16i8,ch = load
+%2 v16i8,ch = rotate %1
+%3 v4f8, ch = bitconvert %2
+%4 f32 = vec2perfslot %3
+%5 f64 = fp_extend %4
+\endverbatim
+*/
static SDValue
LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
LoadSDNode *LN = cast<LoadSDNode>(Op);
SDValue the_chain = LN->getChain();
- MVT VT = LN->getMemoryVT();
- MVT OpVT = Op.getNode()->getValueType(0);
+ MVT InVT = LN->getMemoryVT();
+ MVT OutVT = Op.getValueType();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
SDValue Ops[8];
@@ -613,7 +616,8 @@
int offset, rotamt;
bool was16aligned;
SDValue result =
- AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
+ AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
+ was16aligned);
if (result.getNode() == 0)
return result;
@@ -625,57 +629,40 @@
if (rotamt != 0 || !was16aligned) {
SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
- Ops[0] = the_chain;
- Ops[1] = result;
+ Ops[0] = result;
if (was16aligned) {
- Ops[2] = DAG.getConstant(rotamt, MVT::i16);
+ Ops[1] = DAG.getConstant(rotamt, MVT::i16);
} else {
MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
LoadSDNode *LN1 = cast<LoadSDNode>(result);
- Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
+ Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
DAG.getConstant(rotamt, PtrVT));
}
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
- the_chain = result.getValue(1);
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
}
- if (VT == OpVT || ExtType == ISD::EXTLOAD) {
- SDVTList scalarvts;
- MVT vecVT = MVT::v16i8;
-
- // Convert the loaded v16i8 vector to the appropriate vector type
- // specified by the operand:
- if (OpVT == VT) {
- if (VT != MVT::i1)
- vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
- } else
- vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
+ // Convert the loaded v16i8 vector to the appropriate vector type
+ // specified by the operand:
+ MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
+ DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
- Ops[0] = the_chain;
- Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
- scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
- result = DAG.getNode(SPUISD::VEC2PREFSLOT_CHAINED, scalarvts, Ops, 2);
- the_chain = result.getValue(1);
- } else {
- // Handle the sign and zero-extending loads for i1 and i8:
- unsigned NewOpC;
+ // Handle extending loads by extending the scalar result:
+ if (ExtType == ISD::SEXTLOAD) {
+ result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
+ } else if (ExtType == ISD::EXTLOAD) {
+ unsigned NewOpc = ISD::ANY_EXTEND;
- if (ExtType == ISD::SEXTLOAD) {
- NewOpC = (OpVT == MVT::i1
- ? SPUISD::EXTRACT_I1_SEXT
- : SPUISD::EXTRACT_I8_SEXT);
- } else {
- assert(ExtType == ISD::ZEXTLOAD);
- NewOpC = (OpVT == MVT::i1
- ? SPUISD::EXTRACT_I1_ZEXT
- : SPUISD::EXTRACT_I8_ZEXT);
- }
+ if (OutVT.isFloatingPoint())
+ NewOpc = ISD::FP_EXTEND;
- result = DAG.getNode(NewOpC, OpVT, result);
+ result = DAG.getNode(NewOpc, OutVT, result);
}
- SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
+ SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
SDValue retops[2] = {
result,
the_chain
@@ -3034,10 +3021,16 @@
SDValue combinedConst =
DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
- DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
- << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
- DEBUG(cerr << "With: (SPUindirect <arg>, "
- << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
+#if defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (add " << CN0->getZExtValue() << ", "
+ << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n"
+ << "With: (SPUindirect <arg>, "
+ << CN0->getZExtValue() + CN1->getZExtValue() << ")\n";
+ }
+#endif
+
return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
Op0.getOperand(0), combinedConst);
}
@@ -3071,11 +3064,14 @@
// (any_extend (SPUextract_elt0 <arg>)) ->
// (SPUextract_elt0 <arg>)
// Types must match, however...
- DEBUG(cerr << "Replace: ");
- DEBUG(N->dump(&DAG));
- DEBUG(cerr << "\nWith: ");
- DEBUG(Op0.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
+#if defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\nReplace: ";
+ N->dump(&DAG);
+ cerr << "\nWith: ";
+ Op0.getNode()->dump(&DAG);
+ cerr << "\n";
+#endif
return Op0;
}
@@ -3243,8 +3239,7 @@
}
case SPUISD::LDRESULT:
- case SPUISD::VEC2PREFSLOT:
- case SPUISD::VEC2PREFSLOT_CHAINED: {
+ case SPUISD::VEC2PREFSLOT: {
MVT OpVT = Op.getValueType();
unsigned OpVTBits = OpVT.getSizeInBits();
uint64_t InMask = OpVT.getIntegerVTBitMask();
@@ -3254,10 +3249,6 @@
}
#if 0
- case EXTRACT_I1_ZEXT:
- case EXTRACT_I1_SEXT:
- case EXTRACT_I8_ZEXT:
- case EXTRACT_I8_SEXT:
case MPY:
case MPYU:
case MPYH:
@@ -3272,7 +3263,6 @@
case SPUISD::ROTQUAD_RZ_BYTES:
case SPUISD::ROTQUAD_RZ_BITS:
case SPUISD::ROTBYTES_LEFT:
- case SPUISD::ROTBYTES_LEFT_CHAINED:
case SPUISD::SELECT_MASK:
case SPUISD::SELB:
case SPUISD::FPInterp:
Modified: llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h?rev=60526&r1=60525&r2=60526&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelLowering.h Wed Dec 3 21:02:42 2008
@@ -41,11 +41,6 @@
CNTB, ///< Count leading ones in bytes
PROMOTE_SCALAR, ///< Promote scalar->vector
VEC2PREFSLOT, ///< Extract element 0
- VEC2PREFSLOT_CHAINED, ///< Extract element 0, with chain
- EXTRACT_I1_ZEXT, ///< Extract element 0 as i1, zero extend
- EXTRACT_I1_SEXT, ///< Extract element 0 as i1, sign extend
- EXTRACT_I8_ZEXT, ///< Extract element 0 as i8, zero extend
- EXTRACT_I8_SEXT, ///< Extract element 0 as i8, sign extend
MPY, ///< 16-bit Multiply (low parts of a 32-bit)
MPYU, ///< Multiply Unsigned
MPYH, ///< Multiply High
@@ -60,7 +55,6 @@
ROTQUAD_RZ_BYTES, ///< Rotate quad right, by bytes, zero fill
ROTQUAD_RZ_BITS, ///< Rotate quad right, by bits, zero fill
ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
- ROTBYTES_LEFT_CHAINED, ///< Rotate bytes (loads -> ROTQBYI), with chain
ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
SELB, ///< Select bits -> (b & mask) | (a & ~mask)
Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td?rev=60526&r1=60525&r2=60526&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td Wed Dec 3 21:02:42 2008
@@ -1288,39 +1288,21 @@
def : Pat<(SPUvec2prefslot (v16i8 VECREG:$rA)),
(ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
-def : Pat<(SPUvec2prefslot_chained (v16i8 VECREG:$rA)),
- (ORi8_v16i8 VECREG:$rA, VECREG:$rA)>;
-
def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)),
(ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
-def : Pat<(SPUvec2prefslot_chained (v8i16 VECREG:$rA)),
- (ORi16_v8i16 VECREG:$rA, VECREG:$rA)>;
-
def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
(ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
-def : Pat<(SPUvec2prefslot_chained (v4i32 VECREG:$rA)),
- (ORi32_v4i32 VECREG:$rA, VECREG:$rA)>;
-
def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
(ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
-def : Pat<(SPUvec2prefslot_chained (v2i64 VECREG:$rA)),
- (ORi64_v2i64 VECREG:$rA, VECREG:$rA)>;
-
def : Pat<(SPUvec2prefslot (v4f32 VECREG:$rA)),
(ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
-def : Pat<(SPUvec2prefslot_chained (v4f32 VECREG:$rA)),
- (ORf32_v4f32 VECREG:$rA, VECREG:$rA)>;
-
def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)),
(ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
-def : Pat<(SPUvec2prefslot_chained (v2f64 VECREG:$rA)),
- (ORf64_v2f64 VECREG:$rA, VECREG:$rA)>;
-
// ORC: Bitwise "or" with complement (c = a | ~b)
class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
@@ -2147,15 +2129,6 @@
defm ROTQBY: RotateQuadLeftByBytes;
-def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), R32C:$rB),
- (ROTQBYv16i8 VECREG:$rA, R32C:$rB)>;
-def : Pat<(SPUrotbytes_left_chained (v8i16 VECREG:$rA), R32C:$rB),
- (ROTQBYv8i16 VECREG:$rA, R32C:$rB)>;
-def : Pat<(SPUrotbytes_left_chained (v4i32 VECREG:$rA), R32C:$rB),
- (ROTQBYv4i32 VECREG:$rA, R32C:$rB)>;
-def : Pat<(SPUrotbytes_left_chained (v2i64 VECREG:$rA), R32C:$rB),
- (ROTQBYv2i64 VECREG:$rA, R32C:$rB)>;
-
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// Rotate quad by byte (count), immediate
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@@ -2179,15 +2152,6 @@
defm ROTQBYI: RotateQuadByBytesImm;
-def : Pat<(SPUrotbytes_left_chained (v16i8 VECREG:$rA), (i16 uimm7:$val)),
- (ROTQBYIv16i8 VECREG:$rA, uimm7:$val)>;
-def : Pat<(SPUrotbytes_left_chained (v8i16 VECREG:$rA), (i16 uimm7:$val)),
- (ROTQBYIv8i16 VECREG:$rA, uimm7:$val)>;
-def : Pat<(SPUrotbytes_left_chained (v4i32 VECREG:$rA), (i16 uimm7:$val)),
- (ROTQBYIv4i32 VECREG:$rA, uimm7:$val)>;
-def : Pat<(SPUrotbytes_left_chained (v2i64 VECREG:$rA), (i16 uimm7:$val)),
- (ROTQBYIv2i64 VECREG:$rA, uimm7:$val)>;
-
// See ROTQBY note above.
class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
RI7Form<0b00110011100, OOL, IOL,
@@ -3972,10 +3936,6 @@
// Zero/Any/Sign extensions
//===----------------------------------------------------------------------===//
-// zext 1->32: Zero extend i1 to i32
-def : Pat<(SPUextract_i1_zext R32C:$rSrc),
- (ANDIr32 R32C:$rSrc, 0x1)>;
-
// sext 8->32: Sign extend bytes to words
def : Pat<(sext_inreg R32C:$rSrc, i8),
(XSHWr32 (XSBHr32 R32C:$rSrc))>;
@@ -3983,19 +3943,10 @@
def : Pat<(i32 (sext R8C:$rSrc)),
(XSHWr16 (XSBHr8 R8C:$rSrc))>;
-def : Pat<(SPUextract_i8_sext VECREG:$rSrc),
- (XSHWr32 (XSBHr32 (ORi32_v4i32 (v4i32 VECREG:$rSrc),
- (v4i32 VECREG:$rSrc))))>;
-
// zext 8->16: Zero extend bytes to halfwords
def : Pat<(i16 (zext R8C:$rSrc)),
(ANDHIi8i16 R8C:$rSrc, 0xff)>;
-// zext 8->32 from preferred slot in load/store
-def : Pat<(SPUextract_i8_zext VECREG:$rSrc),
- (ANDIr32 (ORi32_v4i32 (v4i32 VECREG:$rSrc), (v4i32 VECREG:$rSrc)),
- 0xff)>;
-
// zext 8->32: Zero extend bytes to words
def : Pat<(i32 (zext R8C:$rSrc)),
(ANDIi8i32 R8C:$rSrc, 0xff)>;
Modified: llvm/trunk/lib/Target/CellSPU/SPUNodes.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUNodes.td?rev=60526&r1=60525&r2=60526&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUNodes.td (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUNodes.td Wed Dec 3 21:02:42 2008
@@ -125,11 +125,6 @@
def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
SPUvecshift_type, []>;
-// Same as above, but the node also has a chain associated (used in loads and
-// stores)
-def SPUrotbytes_left_chained : SDNode<"SPUISD::ROTBYTES_LEFT_CHAINED",
- SPUvecshift_type, [SDNPHasChain]>;
-
// Vector rotate left by bytes, but the count is given in bits and the SPU
// internally converts it to bytes (saves an instruction to mask off lower
// three bits)
@@ -153,13 +148,6 @@
def SPU_vec_demote : SDTypeProfile<1, 1, []>;
def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
-def SPU_vec_demote_chained : SDTypeProfile<1, 2, []>;
-def SPUvec2prefslot_chained: SDNode<"SPUISD::VEC2PREFSLOT_CHAINED",
- SPU_vec_demote_chained, [SDNPHasChain]>;
-def SPUextract_i1_sext: SDNode<"SPUISD::EXTRACT_I1_SEXT", SPU_vec_demote, []>;
-def SPUextract_i1_zext: SDNode<"SPUISD::EXTRACT_I1_ZEXT", SPU_vec_demote, []>;
-def SPUextract_i8_sext: SDNode<"SPUISD::EXTRACT_I8_SEXT", SPU_vec_demote, []>;
-def SPUextract_i8_zext: SDNode<"SPUISD::EXTRACT_I8_ZEXT", SPU_vec_demote, []>;
// Address high and low components, used for [r+r] type addressing
def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
More information about the llvm-commits
mailing list