[llvm] r236590 - [ARM] generate VMAXNM/VMINNM for a compare followed by a select, in safe math mode too
Artyom Skrobov
Artyom.Skrobov at arm.com
Wed May 6 04:44:11 PDT 2015
Author: askrobov
Date: Wed May 6 06:44:10 2015
New Revision: 236590
URL: http://llvm.org/viewvc/llvm-project?rev=236590&view=rev
Log:
[ARM] generate VMAXNM/VMINNM for a compare followed by a select, in safe math mode too
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/test/CodeGen/ARM/vminmaxnm.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=236590&r1=236589&r2=236590&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed May 6 06:44:10 2015
@@ -3377,12 +3377,6 @@ SDValue ARMTargetLowering::LowerSELECT(S
SelectTrue, SelectFalse, ISD::SETNE);
}
-static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
- if (CC == ISD::SETNE)
- return ISD::SETEQ;
- return ISD::getSetCCInverse(CC, true);
-}
-
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
bool &swpCmpOps, bool &swpVselOps) {
// Start by selecting the GE condition code for opcodes that return true for
@@ -3495,7 +3489,7 @@ SDValue ARMTargetLowering::LowerSELECT_C
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
- CC = getInverseCCForVSEL(CC);
+ CC = ISD::getSetCCInverse(CC, true);
std::swap(TrueVal, FalseVal);
}
}
@@ -3517,27 +3511,108 @@ SDValue ARMTargetLowering::LowerSELECT_C
// c = fcmp [?gt, ?ge, ?lt, ?le] a, b
// select c, a, b
// In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
- // We only do this transformation in UnsafeFPMath and for no-NaNs
- // comparisons, because signed zeros and NaNs are handled differently than
- // the original code sequence.
- // FIXME: There are more cases that can be transformed even with NaNs,
- // signed zeroes and safe math. E.g. in the following, the result will be
- // FalseVal if a is a NaN or -0./0. and that's what vmaxnm will give, too.
- // c = fcmp ogt, a, 0. ; select c, a, 0. => vmaxnm a, 0.
// FIXME: There is similar code that allows some extensions in
// AArch64TargetLowering::LowerSELECT_CC that should be shared with this
// code.
- if (getTargetMachine().Options.UnsafeFPMath) {
- if (LHS == TrueVal && RHS == FalseVal) {
- if (CC == ISD::SETGT || CC == ISD::SETGE)
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
- if (CC == ISD::SETLT || CC == ISD::SETLE)
- return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
- } else if (LHS == FalseVal && RHS == TrueVal) {
- if (CC == ISD::SETLT || CC == ISD::SETLE)
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
- if (CC == ISD::SETGT || CC == ISD::SETGE)
- return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ bool swapSides = false;
+ if (!getTargetMachine().Options.NoNaNsFPMath) {
+ // transformability may depend on which way around we compare
+ switch (CC) {
+ default:
+ break;
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ // the non-NaN should be RHS
+ swapSides = DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS);
+ break;
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // the non-NaN should be LHS
+ swapSides = DAG.isKnownNeverNaN(RHS) && !DAG.isKnownNeverNaN(LHS);
+ break;
+ }
+ }
+ swapSides = swapSides || (LHS == FalseVal && RHS == TrueVal);
+ if (swapSides) {
+ CC = ISD::getSetCCSwappedOperands(CC);
+ std::swap(LHS, RHS);
+ }
+ if (LHS == TrueVal && RHS == FalseVal) {
+ bool canTransform = true;
+ // FIXME: FastMathFlags::noSignedZeros() doesn't appear reachable from here
+ if (!getTargetMachine().Options.UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+ const ConstantFPSDNode *Zero;
+ switch (CC) {
+ default:
+ break;
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ // RHS must not be -0
+ canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
+ !Zero->isNegative();
+ break;
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ case ISD::SETGE:
+ // LHS must not be -0
+ canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
+ !Zero->isNegative();
+ break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ case ISD::SETLT:
+ // RHS must not be +0
+ canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
+ Zero->isNegative();
+ break;
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ case ISD::SETLE:
+ // LHS must not be +0
+ canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
+ Zero->isNegative();
+ break;
+ }
+ }
+ if (canTransform) {
+ // Note: If one of the elements in a pair is a number and the other
+ // element is NaN, the corresponding result element is the number.
+ // This is consistent with the IEEE 754-2008 standard.
+ // Therefore, a > b ? a : b <=> vmax(a,b), if b is constant and a is NaN
+ switch (CC) {
+ default:
+ break;
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ if (!DAG.isKnownNeverNaN(RHS))
+ break;
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ if (!DAG.isKnownNeverNaN(LHS))
+ break;
+ case ISD::SETGT:
+ case ISD::SETGE:
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ if (!DAG.isKnownNeverNaN(RHS))
+ break;
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
+ case ISD::SETULT:
+ case ISD::SETULE:
+ if (!DAG.isKnownNeverNaN(LHS))
+ break;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
+ }
}
}
Modified: llvm/trunk/test/CodeGen/ARM/vminmaxnm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vminmaxnm.ll?rev=236590&r1=236589&r2=236590&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vminmaxnm.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vminmaxnm.ll Wed May 6 06:44:10 2015
@@ -218,6 +218,291 @@ define double @fp-armv8_vmaxnm_ule_rev(d
ret double %cond
}
+; known non-NaNs
+
+define float @fp-armv8_vminnm_NNNo(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNo":
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_NNNo":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp olt float %a, 12.
+ %cond1 = select i1 %cmp1, float %a, float 12.
+ %cmp2 = fcmp olt float 34., %cond1
+ %cond2 = select i1 %cmp2, float 34., float %cond1
+ ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNole(double %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNole":
+; CHECK-FAST: vminnm.f64
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_NNNole":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+ %cmp1 = fcmp ole double %a, 34.
+ %cond1 = select i1 %cmp1, double %a, double 34.
+ %cmp2 = fcmp ole double 56., %cond1
+ %cond2 = select i1 %cmp2, double 56., double %cond1
+ ret double %cond2
+}
+
+define float @fp-armv8_vminnm_NNNo_rev(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNo_rev":
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_NNNo_rev":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ogt float %a, 56.
+ %cond1 = select i1 %cmp1, float 56., float %a
+ %cmp2 = fcmp ogt float 78., %cond1
+ %cond2 = select i1 %cmp2, float %cond1, float 78.
+ ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNoge_rev":
+; CHECK-FAST: vminnm.f64
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_NNNoge_rev":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+ %cmp1 = fcmp oge double %a, 78.
+ %cond1 = select i1 %cmp1, double 78., double %a
+ %cmp2 = fcmp oge double 90., %cond1
+ %cond2 = select i1 %cmp2, double %cond1, double 90.
+ ret double %cond2
+}
+
+define float @fp-armv8_vminnm_NNNu(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNu":
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_NNNu":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ult float 12., %b
+ %cond1 = select i1 %cmp1, float 12., float %b
+ %cmp2 = fcmp ult float %cond1, 34.
+ %cond2 = select i1 %cmp2, float %cond1, float 34.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminnm_NNNule(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNule":
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_NNNule":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ule float 34., %b
+ %cond1 = select i1 %cmp1, float 34., float %b
+ %cmp2 = fcmp ule float %cond1, 56.
+ %cond2 = select i1 %cmp2, float %cond1, float 56.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminnm_NNNu_rev(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNu_rev":
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_NNNu_rev":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ugt float 56., %b
+ %cond1 = select i1 %cmp1, float %b, float 56.
+ %cmp2 = fcmp ugt float %cond1, 78.
+ %cond2 = select i1 %cmp2, float 78., float %cond1
+ ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNuge_rev":
+; CHECK-FAST: vminnm.f64
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_NNNuge_rev":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+ %cmp1 = fcmp uge double 78., %b
+ %cond1 = select i1 %cmp1, double %b, double 78.
+ %cmp2 = fcmp uge double %cond1, 90.
+ %cond2 = select i1 %cmp2, double 90., double %cond1
+ ret double %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNo(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNo":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ogt float %a, 12.
+ %cond1 = select i1 %cmp1, float %a, float 12.
+ %cmp2 = fcmp ogt float 34., %cond1
+ %cond2 = select i1 %cmp2, float 34., float %cond1
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNoge(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNoge":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNoge":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp oge float %a, 34.
+ %cond1 = select i1 %cmp1, float %a, float 34.
+ %cmp2 = fcmp oge float 56., %cond1
+ %cond2 = select i1 %cmp2, float 56., float %cond1
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp olt float %a, 56.
+ %cond1 = select i1 %cmp1, float 56., float %a
+ %cmp2 = fcmp olt float 78., %cond1
+ %cond2 = select i1 %cmp2, float %cond1, float 78.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ole float %a, 78.
+ %cond1 = select i1 %cmp1, float 78., float %a
+ %cmp2 = fcmp ole float 90., %cond1
+ %cond2 = select i1 %cmp2, float %cond1, float 90.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNu(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNu":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu":
+; CHECK: vmaxnm.f32
+; CHEC-NOT: vmaxnm.f32
+ %cmp1 = fcmp ugt float 12., %b
+ %cond1 = select i1 %cmp1, float 12., float %b
+ %cmp2 = fcmp ugt float %cond1, 34.
+ %cond2 = select i1 %cmp2, float %cond1, float 34.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNuge(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNuge":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNuge":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp uge float 34., %b
+ %cond1 = select i1 %cmp1, float 34., float %b
+ %cmp2 = fcmp uge float %cond1, 56.
+ %cond2 = select i1 %cmp2, float %cond1, float 56.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ult float 56., %b
+ %cond1 = select i1 %cmp1, float %b, float 56.
+ %cmp2 = fcmp ult float %cond1, 78.
+ %cond2 = select i1 %cmp2, float 78., float %cond1
+ ret float %cond2
+}
+
+define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
+; CHECK-FAST: vmaxnm.f64
+; CHECK-FAST: vmaxnm.f64
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
+; CHECK: vmaxnm.f64
+; CHECK-NOT: vmaxnm.f64
+ %cmp1 = fcmp ule double 78., %b
+ %cond1 = select i1 %cmp1, double %b, double 78.
+ %cmp2 = fcmp ule double %cond1, 90.
+ %cond2 = select i1 %cmp2, double 90., double %cond1
+ ret double %cond2
+}
+
+define float @fp-armv8_vminmaxnm_0(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_0":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vminmaxnm_0":
+; CHECK-NOT: vminnm.f32
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp olt float %a, 0.
+ %cond1 = select i1 %cmp1, float %a, float 0.
+ %cmp2 = fcmp ogt float %cond1, 0.
+ %cond2 = select i1 %cmp2, float %cond1, float 0.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_neg0(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_neg0":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vminmaxnm_neg0":
+; CHECK: vminnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp olt float %a, -0.
+ %cond1 = select i1 %cmp1, float %a, float -0.
+ %cmp2 = fcmp ogt float %cond1, -0.
+ %cond2 = select i1 %cmp2, float %cond1, float -0.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_e_0(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_e_0":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vminmaxnm_e_0":
+; CHECK-NOT: vminnm.f32
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp ule float 0., %a
+ %cond1 = select i1 %cmp1, float 0., float %a
+ %cmp2 = fcmp uge float 0., %cond1
+ %cond2 = select i1 %cmp2, float 0., float %cond1
+ ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_e_neg0(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_e_neg0":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vminmaxnm_e_neg0":
+; CHECK: vminnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ule float -0., %a
+ %cond1 = select i1 %cmp1, float -0., float %a
+ %cmp2 = fcmp uge float -0., %cond1
+ %cond2 = select i1 %cmp2, float -0., float %cond1
+ ret float %cond2
+}
declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
More information about the llvm-commits
mailing list