[llvm-commits] [llvm] r59790 - in /llvm/trunk: lib/Target/CellSPU/SPUISelDAGToDAG.cpp lib/Target/CellSPU/SPUISelLowering.cpp lib/Target/CellSPU/SPUInstrInfo.cpp lib/Target/CellSPU/SPUInstrInfo.td test/CodeGen/CellSPU/loads.ll test/CodeGen/CellSPU/stores.ll
Scott Michel
scottm at aero.org
Thu Nov 20 18:56:16 PST 2008
Author: pingbak
Date: Thu Nov 20 20:56:16 2008
New Revision: 59790
URL: http://llvm.org/viewvc/llvm-project?rev=59790&view=rev
Log:
CellSPU:
(a) Fix bgs 3052, 3057
(b) Incorporate Duncan's suggestions re: i1 promotion
(c) Indentation updates.
Added:
llvm/trunk/test/CodeGen/CellSPU/loads.ll
llvm/trunk/test/CodeGen/CellSPU/stores.ll
Modified:
llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp
llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp
llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td
Modified: llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp?rev=59790&r1=59789&r2=59790&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelDAGToDAG.cpp Thu Nov 20 20:56:16 2008
@@ -430,8 +430,8 @@
SPUDAGToDAGISel::SelectDFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index) {
return DFormAddressPredicate(Op, N, Base, Index,
- SPUFrameInfo::minFrameOffset(),
- SPUFrameInfo::maxFrameOffset());
+ SPUFrameInfo::minFrameOffset(),
+ SPUFrameInfo::maxFrameOffset());
}
bool
@@ -544,7 +544,35 @@
Base = CurDAG->getTargetConstant(0, N.getValueType());
Index = N;
return true;
+ } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) {
+ unsigned OpOpc = Op.getOpcode();
+
+ if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
+ // Direct load/store without getelementptr
+ SDValue Addr, Offs;
+
+ // Get the register from CopyFromReg
+ if (Opc == ISD::CopyFromReg)
+ Addr = N.getOperand(1);
+ else
+ Addr = N; // Register
+
+ if (OpOpc == ISD::STORE)
+ Offs = Op.getOperand(3);
+ else
+ Offs = Op.getOperand(2); // LOAD
+
+ if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
+ if (Offs.getOpcode() == ISD::UNDEF)
+ Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
+
+ Base = Offs;
+ Index = Addr;
+ return true;
+ }
+ }
}
+
return false;
}
@@ -554,21 +582,27 @@
\arg Base The base pointer operand
\arg Index The offset/index operand
- If the address \a N can be expressed as a [r + s10imm] address, returns false.
- Otherwise, creates two operands, Base and Index that will become the [r+r]
- address.
+ If the address \a N can be expressed as an A-form or D-form address, returns
+ false. Otherwise, creates two operands, Base and Index that will become the
+ (r)(r) X-form address.
*/
bool
SPUDAGToDAGISel::SelectXFormAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Index) {
- if (SelectAFormAddr(Op, N, Base, Index)
- || SelectDFormAddr(Op, N, Base, Index))
- return false;
-
- // All else fails, punt and use an X-form address:
- Base = N.getOperand(0);
- Index = N.getOperand(1);
- return true;
+ if (!SelectAFormAddr(Op, N, Base, Index)
+ && !SelectDFormAddr(Op, N, Base, Index)) {
+ // default form of a X-form address is r(r) in operands 0 and 1:
+ SDValue Op0 = N.getOperand(0);
+ SDValue Op1 = N.getOperand(1);
+
+ if (Op0.getOpcode() == ISD::Register && Op1.getOpcode() == ISD::Register) {
+ Base = Op0;
+ Index = Op1;
+ return true;
+ }
+ }
+
+ return false;
}
//! Convert the operand from a target-independent to a target-specific node
Modified: llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp?rev=59790&r1=59789&r2=59790&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp Thu Nov 20 20:56:16 2008
@@ -165,8 +165,7 @@
setOperationAction(ISD::STORE, VT, Custom);
}
- // Custom lower BRCOND for i1, i8 to "promote" the result to
- // i32 and i16, respectively.
+ // Custom lower BRCOND for i8 to "promote" the result to i16
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
// Expand the jumptable branches
@@ -215,7 +214,8 @@
setOperationAction(ISD::SHL, MVT::i8, Custom);
setOperationAction(ISD::SRL, MVT::i8, Custom);
setOperationAction(ISD::SRA, MVT::i8, Custom);
- // And SPU needs custom lowering for shift left/right for i64
+
+ // SPU needs custom lowering for shift left/right for i64
setOperationAction(ISD::SHL, MVT::i64, Custom);
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
@@ -223,7 +223,13 @@
// Custom lower i8, i32 and i64 multiplications
setOperationAction(ISD::MUL, MVT::i8, Custom);
setOperationAction(ISD::MUL, MVT::i32, Custom);
- setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
+
+ // SMUL_LOHI, UMUL_LOHI
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i64, Custom);
@@ -247,13 +253,11 @@
// SPU has a version of select that implements (a&~c)|(b&c), just like
// select ought to work:
- setOperationAction(ISD::SELECT, MVT::i1, Promote);
setOperationAction(ISD::SELECT, MVT::i8, Legal);
setOperationAction(ISD::SELECT, MVT::i16, Legal);
setOperationAction(ISD::SELECT, MVT::i32, Legal);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
- setOperationAction(ISD::SETCC, MVT::i1, Promote);
setOperationAction(ISD::SETCC, MVT::i8, Legal);
setOperationAction(ISD::SETCC, MVT::i16, Legal);
setOperationAction(ISD::SETCC, MVT::i32, Legal);
@@ -299,7 +303,7 @@
// We want to legalize GlobalAddress and ConstantPool nodes into the
// appropriate instructions to materialize the address.
- for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
++sctype) {
MVT VT = (MVT::SimpleValueType)sctype;
@@ -699,8 +703,7 @@
int chunk_offset, slot_offset;
bool was16aligned;
- // The vector type we really want to load from the 16-byte chunk, except
- // in the case of MVT::i1, which has to be v16i8.
+ // The vector type we really want to load from the 16-byte chunk.
MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
@@ -908,7 +911,7 @@
return SDValue();
}
-//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
+//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
static SDValue
LowerBRCOND(SDValue Op, SelectionDAG &DAG)
{
@@ -916,8 +919,8 @@
MVT CondVT = Cond.getValueType();
MVT CondNVT;
- if (CondVT == MVT::i1 || CondVT == MVT::i8) {
- CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
+ if (CondVT == MVT::i8) {
+ CondNVT = MVT::i16;
return DAG.getNode(ISD::BRCOND, Op.getValueType(),
Op.getOperand(0),
DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
@@ -957,37 +960,37 @@
switch (ObjectVT.getSimpleVT()) {
default: {
- cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
- << ObjectVT.getMVTString()
- << "\n";
- abort();
+ cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
+ << ObjectVT.getMVTString()
+ << "\n";
+ abort();
}
case MVT::i8:
- ArgRegClass = &SPU::R8CRegClass;
- break;
+ ArgRegClass = &SPU::R8CRegClass;
+ break;
case MVT::i16:
- ArgRegClass = &SPU::R16CRegClass;
- break;
+ ArgRegClass = &SPU::R16CRegClass;
+ break;
case MVT::i32:
- ArgRegClass = &SPU::R32CRegClass;
- break;
+ ArgRegClass = &SPU::R32CRegClass;
+ break;
case MVT::i64:
- ArgRegClass = &SPU::R64CRegClass;
- break;
+ ArgRegClass = &SPU::R64CRegClass;
+ break;
case MVT::f32:
- ArgRegClass = &SPU::R32FPRegClass;
- break;
+ ArgRegClass = &SPU::R32FPRegClass;
+ break;
case MVT::f64:
- ArgRegClass = &SPU::R64FPRegClass;
- break;
+ ArgRegClass = &SPU::R64FPRegClass;
+ break;
case MVT::v2f64:
case MVT::v4f32:
case MVT::v2i64:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
- ArgRegClass = &SPU::VECREGRegClass;
- break;
+ ArgRegClass = &SPU::VECREGRegClass;
+ break;
}
unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
@@ -2103,7 +2106,6 @@
// zero fill uppper part of preferred slot, don't care about the
// other slots:
unsigned int mask_val;
-
if (i <= prefslot_end) {
mask_val =
((i < prefslot_begin)
@@ -2884,7 +2886,7 @@
}
}
// Otherwise, return unchanged.
-#if 1
+#ifdef NDEBUG
if (Result.getNode()) {
DEBUG(cerr << "\nReplace.SPU: ");
DEBUG(N->dump(&DAG));
Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp?rev=59790&r1=59789&r2=59790&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp Thu Nov 20 20:56:16 2008
@@ -161,7 +161,7 @@
case SPU::STQDr64:
case SPU::STQDr32:
case SPU::STQDr16:
- // case SPU::STQDr8:
+ case SPU::STQDr8:
case SPU::STQXv16i8:
case SPU::STQXv8i16:
case SPU::STQXv4i32:
@@ -171,7 +171,7 @@
case SPU::STQXr64:
case SPU::STQXr32:
case SPU::STQXr16:
- // case SPU::STQXr8:
+ case SPU::STQXr8:
if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
MI->getOperand(2).isFI()) {
FrameIndex = MI->getOperand(2).getIndex();
Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td?rev=59790&r1=59789&r2=59790&view=diff
==============================================================================
--- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td (original)
+++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td Thu Nov 20 20:56:16 2008
@@ -3494,26 +3494,62 @@
"fi\t$rT, $rA, $rB", SPrecFP,
[(set R32FP:$rT, (SPUinterpolate R32FP:$rA, R32FP:$rB))]>;
-// Floating Compare Equal
+//--------------------------------------------------------------------------
+// Basic single precision floating point comparisons:
+//
+// Note: There is no support on SPU for single precision NaN. Consequently,
+// ordered and unordered comparisons are the same.
+//--------------------------------------------------------------------------
+
def FCEQf32 :
RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
"fceq\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setoeq R32FP:$rA, R32FP:$rB))]>;
+ [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
+ (FCEQf32 R32FP:$rA, R32FP:$rB)>;
def FCMEQf32 :
RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
"fcmeq\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+ [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
def FCGTf32 :
RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
"fcgt\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setogt R32FP:$rA, R32FP:$rB))]>;
+ [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setugt R32FP:$rA, R32FP:$rB),
+ (FCGTf32 R32FP:$rA, R32FP:$rB)>;
def FCMGTf32 :
RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
"fcmgt\t$rT, $rA, $rB", SPrecFP,
- [(set R32C:$rT, (setogt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+ [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setugt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
+
+//--------------------------------------------------------------------------
+// Single precision floating point comparisons and SETCC equivalents:
+//--------------------------------------------------------------------------
+
+def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
+def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
+
+def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
+
+def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
+
+def : Pat<(setule R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+def : Pat<(setole R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
// FP Status and Control Register Write
// Why isn't rT a don't care in the ISA?
Added: llvm/trunk/test/CodeGen/CellSPU/loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/loads.ll?rev=59790&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/CellSPU/loads.ll (added)
+++ llvm/trunk/test/CodeGen/CellSPU/loads.ll Thu Nov 20 20:56:16 2008
@@ -0,0 +1,20 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep {lqd.*0(\$3)} %t1.s | count 1
+; RUN: grep {lqd.*16(\$3)} %t1.s | count 1
+
+; ModuleID = 'loads.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
+entry:
+ %tmp1 = load <4 x float>* %a
+ ret <4 x float> %tmp1
+}
+
+define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
+entry:
+ %arrayidx = getelementptr <4 x float>* %a, i32 1 ; <<4 x float>*> [#uses=1]
+ %tmp1 = load <4 x float>* %arrayidx ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp1
+}
Added: llvm/trunk/test/CodeGen/CellSPU/stores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/stores.ll?rev=59790&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/CellSPU/stores.ll (added)
+++ llvm/trunk/test/CodeGen/CellSPU/stores.ll Thu Nov 20 20:56:16 2008
@@ -0,0 +1,22 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep {stqd.*0(\$3)} %t1.s | count 1
+; RUN: grep {stqd.*16(\$3)} %t1.s | count 1
+; RUN: grep 16256 %t1.s | count 1
+; RUN: grep 16384 %t1.s | count 1
+
+; ModuleID = 'stores.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define void @store_v4f32_1(<4 x float>* %a) nounwind {
+entry:
+ store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a
+ ret void
+}
+
+define void @store_v4f32_2(<4 x float>* %a) nounwind {
+entry:
+ %arrayidx = getelementptr <4 x float>* %a, i32 1
+ store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx
+ ret void
+}
More information about the llvm-commits
mailing list