[llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp PPCISelLowering.cpp PPCISelLowering.h PPCInstrInfo.td PPCRegisterInfo.cpp
Chris Lattner
lattner at cs.uiuc.edu
Tue Mar 21 21:30:45 PST 2006
Changes in directory llvm/lib/Target/PowerPC:
PPCHazardRecognizers.cpp updated: 1.11 -> 1.12
PPCISelLowering.cpp updated: 1.107 -> 1.108
PPCISelLowering.h updated: 1.30 -> 1.31
PPCInstrInfo.td updated: 1.202 -> 1.203
PPCRegisterInfo.cpp updated: 1.48 -> 1.49
---
Log message:
When possible, custom lower 32-bit SINT_TO_FP to this:
_foo2:
extsw r2, r3
std r2, -8(r1)
lfd f0, -8(r1)
fcfid f0, f0
frsp f1, f0
blr
instead of this:
_foo2:
lis r2, ha16(LCPI2_0)
lis r4, 17200
xoris r3, r3, 32768
stw r3, -4(r1)
stw r4, -8(r1)
lfs f0, lo16(LCPI2_0)(r2)
lfd f1, -8(r1)
fsub f0, f1, f0
frsp f1, f0
blr
This speeds up Misc/pi from 2.44s->2.09s with LLC and from 3.01->2.18s
with llcbeta (16.7% and 38.1% respectively).
---
Diffs of the changes: (+92 -32)
PPCHazardRecognizers.cpp | 3 +
PPCISelLowering.cpp | 86 ++++++++++++++++++++++++++++++++---------------
PPCISelLowering.h | 7 +++
PPCInstrInfo.td | 26 ++++++++++++--
PPCRegisterInfo.cpp | 2 -
5 files changed, 92 insertions(+), 32 deletions(-)
Index: llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
diff -u llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp:1.11 llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp:1.12
--- llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp:1.11 Sun Mar 12 23:23:59 2006
+++ llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp Tue Mar 21 23:30:33 2006
@@ -245,8 +245,9 @@
case PPC::STFIWX:
ThisStoreSize = 4;
break;
+ case PPC::STD_32:
+ case PPC::STDX_32:
case PPC::STD:
- case PPC::STDU:
case PPC::STFD:
case PPC::STFDX:
case PPC::STDX:
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff -u llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.107 llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.108
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp:1.107 Tue Mar 21 14:51:05 2006
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp Tue Mar 21 23:30:33 2006
@@ -140,6 +140,7 @@
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
// To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
} else {
@@ -222,6 +223,8 @@
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
+ case PPCISD::STD_32: return "PPCISD::STD_32";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
}
@@ -302,15 +305,41 @@
Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
return Bits;
}
- case ISD::SINT_TO_FP: {
- assert(MVT::i64 == Op.getOperand(0).getValueType() &&
- "Unhandled SINT_TO_FP type in custom expander!");
- SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
- SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
- if (MVT::f32 == Op.getValueType())
- FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
- return FP;
- }
+ case ISD::SINT_TO_FP:
+ if (Op.getOperand(0).getValueType() == MVT::i64) {
+ SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
+ SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
+ return FP;
+ } else {
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "Unhandled SINT_TO_FP type in custom expander!");
+ // Since we only generate this in 64-bit mode, we can take advantage of
+ // 64-bit registers. In particular, sign extend the input value into the
+ // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+ // then lfd it and fcfid it.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8);
+ SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
+
+ SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
+ DAG.getEntryNode(), Ext64, FIdx,
+ DAG.getSrcValue(NULL));
+ // Load the value as a double.
+ SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
+
+ // FCFID it and return it.
+ SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
+ if (Op.getValueType() == MVT::f32)
+ FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
+ return FP;
+ }
+
case ISD::SELECT_CC: {
// Turn FP only select_cc's into fsel instructions.
if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
@@ -1106,27 +1135,30 @@
default: break;
case ISD::SINT_TO_FP:
if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
- // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
- // We allow the src/dst to be either f32/f64, but force the intermediate
- // type to be i64.
- if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT &&
- N->getOperand(0).getValueType() == MVT::i64) {
-
- SDOperand Val = N->getOperand(0).getOperand(0);
- if (Val.getValueType() == MVT::f32) {
- Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
+ if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
+ // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
+ // We allow the src/dst to be either f32/f64, but the intermediate
+ // type must be i64.
+ if (N->getOperand(0).getValueType() == MVT::i64) {
+ SDOperand Val = N->getOperand(0).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
+ DCI.AddToWorklist(Val.Val);
+ }
+
+ Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
DCI.AddToWorklist(Val.Val);
- }
-
- Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
- DCI.AddToWorklist(Val.Val);
- Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
- DCI.AddToWorklist(Val.Val);
- if (N->getValueType(0) == MVT::f32) {
- Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
+ Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
DCI.AddToWorklist(Val.Val);
+ if (N->getValueType(0) == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
+ DCI.AddToWorklist(Val.Val);
+ }
+ return Val;
+ } else if (N->getOperand(0).getValueType() == MVT::i32) {
+ // If the intermediate type is i32, we can avoid the load/store here
+ // too.
}
- return Val;
}
}
break;
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
diff -u llvm/lib/Target/PowerPC/PPCISelLowering.h:1.30 llvm/lib/Target/PowerPC/PPCISelLowering.h:1.31
--- llvm/lib/Target/PowerPC/PPCISelLowering.h:1.30 Mon Mar 20 00:33:01 2006
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h Tue Mar 21 23:30:33 2006
@@ -75,7 +75,14 @@
/// shift amounts. These nodes are generated by the multi-precision shift
/// code.
SRL, SRA, SHL,
+
+ /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
+ /// registers.
+ EXTSW_32,
+ /// STD_32 - This is the STD instruction for use with "32-bit" registers.
+ STD_32,
+
/// CALL - A function call.
CALL,
Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td
diff -u llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.202 llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.203
--- llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.202 Tue Mar 21 19:44:36 2006
+++ llvm/lib/Target/PowerPC/PPCInstrInfo.td Tue Mar 21 23:30:33 2006
@@ -58,6 +58,9 @@
def PPCsra : SDNode<"PPCISD::SRA" , SDT_PPCShiftOp>;
def PPCshl : SDNode<"PPCISD::SHL" , SDT_PPCShiftOp>;
+def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>;
+def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore, [SDNPHasChain]>;
+
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeq,[SDNPHasChain]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeq,[SDNPHasChain]>;
@@ -194,11 +197,17 @@
let NumMIOperands = 2;
let MIOperandInfo = (ops GPRC, GPRC);
}
+def memrix : Operand<i32> { // memri where the imm is shifted 2 bits.
+ let PrintMethod = "printMemRegImmShifted";
+ let NumMIOperands = 2;
+ let MIOperandInfo = (ops i32imm, GPRC);
+}
// Define PowerPC specific addressing mode.
def iaddr : ComplexPattern<i32, 2, "SelectAddrImm", []>;
def xaddr : ComplexPattern<i32, 2, "SelectAddrIdx", []>;
def xoaddr : ComplexPattern<i32, 2, "SelectAddrIdxOnly",[]>;
+def ixaddr : ComplexPattern<i32, 2, "SelectAddrImmShift", []>; // "std"
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
@@ -428,9 +437,15 @@
def STD : DSForm_2<62, 0, (ops GPRC:$rT, s16immX4:$DS, GPRC:$rA),
"std $rT, $DS($rA)", LdStSTD,
[]>, isPPC64;
-def STDU : DSForm_2<62, 1, (ops GPRC:$rT, s16immX4:$DS, GPRC:$rA),
- "stdu $rT, $DS($rA)", LdStSTD,
- []>, isPPC64;
+
+// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
+def STD_32 : DSForm_2<62, 0, (ops GPRC:$rT, memrix:$dst),
+ "std $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64;
+def STDX_32 : XForm_8<31, 149, (ops GPRC:$rT, memrr:$dst),
+ "stdx $rT, $dst", LdStSTD,
+ [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
}
// X-Form instructions. Most instructions that perform an operation on a
@@ -586,6 +601,11 @@
def EXTSW : XForm_11<31, 986, (ops G8RC:$rA, G8RC:$rS),
"extsw $rA, $rS", IntGeneral,
[(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
+/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
+def EXTSW_32 : XForm_11<31, 986, (ops GPRC:$rA, GPRC:$rS),
+ "extsw $rA, $rS", IntGeneral,
+ [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+
def CMP : XForm_16<31, 0, (ops CRRC:$crD, i1imm:$long, GPRC:$rA, GPRC:$rB),
"cmp $crD, $long, $rA, $rB", IntCompare>;
def CMPL : XForm_16<31, 32, (ops CRRC:$crD, i1imm:$long, GPRC:$rA, GPRC:$rB),
Index: llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
diff -u llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp:1.48 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp:1.49
--- llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp:1.48 Thu Mar 16 17:52:08 2006
+++ llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp Tue Mar 21 23:30:33 2006
@@ -276,7 +276,7 @@
case PPC::LWA:
case PPC::LD:
case PPC::STD:
- case PPC::STDU:
+ case PPC::STD_32:
assert((Offset & 3) == 0 && "Invalid frame offset!");
Offset >>= 2; // The actual encoded value has the low two bits zero.
break;
More information about the llvm-commits
mailing list