[llvm] r179000 - Cleanup and improve PPC fsel generation
Hal Finkel
hfinkel at anl.gov
Sun Apr 7 15:11:09 PDT 2013
Author: hfinkel
Date: Sun Apr 7 17:11:09 2013
New Revision: 179000
URL: http://llvm.org/viewvc/llvm-project?rev=179000&view=rev
Log:
Cleanup and improve PPC fsel generation
First, we should not cheat: fsel-based lowering of select_cc is a
finite-math-only optimization (the ISA manual, section F.3 of v2.06, makes
this clear, as does a note in our own README).
This also adds fsel-based lowering of EQ and NE condition codes. As it turned
out, fsel generation was covered by a grand total of zero regression test
cases. I've added some test cases to cover the existing behavior (which is now
finite-math only), as well as the new EQ cases.
Added:
llvm/trunk/test/CodeGen/PowerPC/fsel.ll
Modified:
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=179000&r1=178999&r2=179000&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Sun Apr 7 17:11:09 2013
@@ -4673,10 +4673,14 @@ SDValue PPCTargetLowering::LowerSELECT_C
!Op.getOperand(2).getValueType().isFloatingPoint())
return Op;
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ // We might be able to do better than this under some circumstances, but in
+ // general, fsel-based lowering of select is a finite-math-only optimization.
+ // For more information, see section F.3 of the 2.06 ISA specification.
+ if (!DAG.getTarget().Options.NoInfsFPMath ||
+ !DAG.getTarget().Options.NoNaNsFPMath)
+ return Op;
- // Cannot handle SETEQ/SETNE.
- if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
EVT ResVT = Op.getValueType();
EVT CmpVT = Op.getOperand(0).getValueType();
@@ -4686,9 +4690,20 @@ SDValue PPCTargetLowering::LowerSELECT_C
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
+ SDValue Sel1;
if (isFloatingPointZero(RHS))
switch (CC) {
default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETNE:
+ std::swap(TV, FV);
+ case ISD::SETEQ:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
@@ -4711,30 +4726,41 @@ SDValue PPCTargetLowering::LowerSELECT_C
SDValue Cmp;
switch (CC) {
default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETNE:
+ std::swap(TV, FV);
+ case ISD::SETEQ:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOGE:
case ISD::SETGE:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
case ISD::SETUGT:
case ISD::SETGT:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOLE:
case ISD::SETLE:
Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
- return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
}
return Op;
}
Added: llvm/trunk/test/CodeGen/PowerPC/fsel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fsel.ll?rev=179000&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fsel.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/fsel.ll Sun Apr 7 17:11:09 2013
@@ -0,0 +1,137 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @zerocmp1(double %a, double %y, double %z) #0 {
+entry:
+ %cmp = fcmp ult double %a, 0.000000e+00
+ %z.y = select i1 %cmp, double %z, double %y
+ ret double %z.y
+
+; CHECK: @zerocmp1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @zerocmp1
+; CHECK-FM: fsel 1, 1, 2, 3
+; CHECK-FM: blr
+}
+
+define double @zerocmp2(double %a, double %y, double %z) #0 {
+entry:
+ %cmp = fcmp ogt double %a, 0.000000e+00
+ %y.z = select i1 %cmp, double %y, double %z
+ ret double %y.z
+
+; CHECK: @zerocmp2
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @zerocmp2
+; CHECK-FM: fneg [[REG:[0-9]+]], 1
+; CHECK-FM: fsel 1, [[REG]], 3, 2
+; CHECK-FM: blr
+}
+
+define double @zerocmp3(double %a, double %y, double %z) #0 {
+entry:
+ %cmp = fcmp oeq double %a, 0.000000e+00
+ %y.z = select i1 %cmp, double %y, double %z
+ ret double %y.z
+
+; CHECK: @zerocmp3
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @zerocmp3
+; CHECK-FM: fsel [[REG:[0-9]+]], 1, 2, 3
+; CHECK-FM: fneg [[REG2:[0-9]+]], 1
+; CHECK-FM: fsel 1, [[REG2]], [[REG]], 3
+; CHECK-FM: blr
+}
+
+define double @min1(double %a, double %b) #0 {
+entry:
+ %cmp = fcmp ole double %a, %b
+ %cond = select i1 %cmp, double %a, double %b
+ ret double %cond
+
+; CHECK: @min1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @min1
+; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1
+; CHECK-FM: fsel 1, [[REG]], 1, 2
+; CHECK-FM: blr
+}
+
+define double @max1(double %a, double %b) #0 {
+entry:
+ %cmp = fcmp oge double %a, %b
+ %cond = select i1 %cmp, double %a, double %b
+ ret double %cond
+
+; CHECK: @max1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @max1
+; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
+; CHECK-FM: fsel 1, [[REG]], 1, 2
+; CHECK-FM: blr
+}
+
+define double @cmp1(double %a, double %b, double %y, double %z) #0 {
+entry:
+ %cmp = fcmp ult double %a, %b
+ %z.y = select i1 %cmp, double %z, double %y
+ ret double %z.y
+
+; CHECK: @cmp1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @cmp1
+; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
+; CHECK-FM: fsel 1, [[REG]], 3, 4
+; CHECK-FM: blr
+}
+
+define double @cmp2(double %a, double %b, double %y, double %z) #0 {
+entry:
+ %cmp = fcmp ogt double %a, %b
+ %y.z = select i1 %cmp, double %y, double %z
+ ret double %y.z
+
+; CHECK: @cmp2
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @cmp2
+; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1
+; CHECK-FM: fsel 1, [[REG]], 4, 3
+; CHECK-FM: blr
+}
+
+define double @cmp3(double %a, double %b, double %y, double %z) #0 {
+entry:
+ %cmp = fcmp oeq double %a, %b
+ %y.z = select i1 %cmp, double %y, double %z
+ ret double %y.z
+
+; CHECK: @cmp3
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @cmp3
+; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
+; CHECK-FM: fsel [[REG2:[0-9]+]], [[REG]], 3, 4
+; CHECK-FM: fneg [[REG3:[0-9]+]], [[REG]]
+; CHECK-FM: fsel 1, [[REG3]], [[REG2]], 4
+; CHECK-FM: blr
+}
+
+attributes #0 = { nounwind readnone }
+
More information about the llvm-commits
mailing list