[llvm-commits] CVS: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp PPCInstrInfo.td
Nate Begeman
natebegeman at mac.com
Wed Dec 14 14:54:45 PST 2005
Changes in directory llvm/lib/Target/PowerPC:
PPCISelDAGToDAG.cpp updated: 1.140 -> 1.141
PPCInstrInfo.td updated: 1.158 -> 1.159
---
Log message:
Use the new predicate support that Evan Cheng added to remove some code
from the DAGToDAG cpp file. This adds pattern support for vector and
scalar fma, which passes test/Regression/CodeGen/PowerPC/fma.ll, and
does the right thing in the presence of -disable-excess-fp-precision.
Allows us to match:
void %foo(<4 x float> * %a) {
entry:
%tmp1 = load <4 x float> * %a;
%tmp2 = mul <4 x float> %tmp1, %tmp1
%tmp3 = add <4 x float> %tmp2, %tmp1
store <4 x float> %tmp3, <4 x float> *%a
ret void
}
As:
_foo:
li r2, 0
lvx v0, r2, r3
vmaddfp v0, v0, v0, v0
stvx v0, r2, r3
blr
Or, with llc -disable-excess-fp-precision,
_foo:
li r2, 0
lvx v0, r2, r3
vxor v1, v1, v1
vmaddfp v1, v0, v0, v1
vaddfp v0, v1, v0
stvx v0, r2, r3
blr
---
Diffs of the changes: (+24 -57)
PPCISelDAGToDAG.cpp | 47 -----------------------------------------------
PPCInstrInfo.td | 34 ++++++++++++++++++++++++----------
2 files changed, 24 insertions(+), 57 deletions(-)
Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
diff -u llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.140 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.141
--- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp:1.140 Fri Dec 9 20:36:00 2005
+++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Wed Dec 14 16:54:33 2005
@@ -885,53 +885,6 @@
CurDAG->getTargetFrameIndex(FI, MVT::i32),
getI32Imm(0));
}
- case ISD::FADD: {
- MVT::ValueType Ty = N->getValueType(0);
- if (!NoExcessFPPrecision) { // Match FMA ops
- if (N->getOperand(0).getOpcode() == ISD::FMUL &&
- N->getOperand(0).Val->hasOneUse()) {
- ++FusedFP; // Statistic
- return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMADD :PPC::FMADDS,
- Ty, Select(N->getOperand(0).getOperand(0)),
- Select(N->getOperand(0).getOperand(1)),
- Select(N->getOperand(1)));
- } else if (N->getOperand(1).getOpcode() == ISD::FMUL &&
- N->getOperand(1).hasOneUse()) {
- ++FusedFP; // Statistic
- return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMADD :PPC::FMADDS,
- Ty, Select(N->getOperand(1).getOperand(0)),
- Select(N->getOperand(1).getOperand(1)),
- Select(N->getOperand(0)));
- }
- }
-
- // Other cases are autogenerated.
- break;
- }
- case ISD::FSUB: {
- MVT::ValueType Ty = N->getValueType(0);
-
- if (!NoExcessFPPrecision) { // Match FMA ops
- if (N->getOperand(0).getOpcode() == ISD::FMUL &&
- N->getOperand(0).Val->hasOneUse()) {
- ++FusedFP; // Statistic
- return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMSUB:PPC::FMSUBS,
- Ty, Select(N->getOperand(0).getOperand(0)),
- Select(N->getOperand(0).getOperand(1)),
- Select(N->getOperand(1)));
- } else if (N->getOperand(1).getOpcode() == ISD::FMUL &&
- N->getOperand(1).Val->hasOneUse()) {
- ++FusedFP; // Statistic
- return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ?PPC::FNMSUB:PPC::FNMSUBS,
- Ty, Select(N->getOperand(1).getOperand(0)),
- Select(N->getOperand(1).getOperand(1)),
- Select(N->getOperand(0)));
- }
- }
-
- // Other cases are autogenerated.
- break;
- }
case ISD::SDIV: {
// FIXME: since this depends on the setting of the carry flag from the srawi
// we should really be making notes about that for the scheduler.
Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td
diff -u llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.158 llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.159
--- llvm/lib/Target/PowerPC/PPCInstrInfo.td:1.158 Wed Dec 14 16:07:12 2005
+++ llvm/lib/Target/PowerPC/PPCInstrInfo.td Wed Dec 14 16:54:33 2005
@@ -168,7 +168,7 @@
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def FPContractions : Predicate<"NoExcessFPPrecision">;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Definitions.
@@ -746,22 +746,26 @@
(ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
[(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB)))]>;
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
def FNMADDS : AForm_1<59, 31,
(ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB)))]>;
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
def FNMSUB : AForm_1<63, 30,
(ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
"fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
[(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
- F8RC:$FRB)))]>;
+ F8RC:$FRB)))]>,
+ Requires<[FPContractions]>;
def FNMSUBS : AForm_1<59, 30,
(ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
"fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
[(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
- F4RC:$FRB)))]>;
+ F4RC:$FRB)))]>,
+ Requires<[FPContractions]>;
// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
// having 4 of these, force the comparison to always be an 8-byte double (code
// should use an FMRSD if the input comparison value really wants to be a float)
@@ -848,12 +852,14 @@
def VMADDFP : VAForm_1<46, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
"vmaddfp $vD, $vA, $vC, $vB", VecFP,
[(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
- VRRC:$vB))]>;
+ VRRC:$vB))]>,
+ Requires<[FPContractions]>;
def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC),
- "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
- [(set VRRC:$vD, (fneg (fsub (fmul VRRC:$vA,
- VRRC:$vC),
- VRRC:$vB)))]>;
+ "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+ [(set VRRC:$vD, (fneg (fsub (fmul VRRC:$vA,
+ VRRC:$vC),
+ VRRC:$vB)))]>,
+ Requires<[FPContractions]>;
// VX-Form instructions. AltiVec arithmetic ops.
def VADDFP : VXForm_1<10, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB),
@@ -971,6 +977,14 @@
def : Pat<(fmul VRRC:$vA, VRRC:$vB),
(VMADDFP VRRC:$vA, (V_SET0), VRRC:$vB)>;
+// Fused negative multiply subtract, alternate pattern
+def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
+ (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
+ Requires<[FPContractions]>;
+def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
+ (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
+ Requires<[FPContractions]>;
+
// Fused multiply add and multiply sub for packed float. These are represented
// separately from the real instructions above, for operations that must have
// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
More information about the llvm-commits
mailing list