[llvm-commits] [llvm] r96572 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h lib/Target/ARM/ARMInstrNEON.td test/CodeGen/ARM/neon_minmax.ll
Bob Wilson
bob.wilson at apple.com
Wed Feb 17 22:05:54 PST 2010
Author: bwilson
Date: Thu Feb 18 00:05:53 2010
New Revision: 96572
URL: http://llvm.org/viewvc/llvm-project?rev=96572&view=rev
Log:
Use NEON vmin/vmax instructions for floating-point selects.
Radar 7461718.
Added:
llvm/trunk/test/CodeGen/ARM/neon_minmax.ll
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=96572&r1=96571&r2=96572&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Feb 18 00:05:53 2010
@@ -294,6 +294,7 @@
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::SELECT_CC);
}
computeRegisterProperties();
@@ -544,6 +545,8 @@
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::FMAX: return "ARMISD::FMAX";
+ case ARMISD::FMIN: return "ARMISD::FMIN";
}
}
@@ -3856,23 +3859,97 @@
return SDValue();
}
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ // If the target supports NEON, try to use vmax/vmin instructions for f32
+ // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
+ // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
+ // a NaN; only do the transformation when it matches that behavior.
+
+ // For now only do this when using NEON for FP operations; if using VFP, it
+ // is not obvious that the benefit outweighs the cost of switching to the
+ // NEON pipeline.
+ if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+ N->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode = 0;
+ bool IsReversed;
+ if (LHS == CondLHS && RHS == CondRHS) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (LHS == CondRHS && RHS == CondLHS) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ // This can be vmin if we can prove that the LHS is not a NaN.
+ // (If either operand is NaN, the comparison will be false and the result
+ // will be the RHS, which matches vmin if RHS is the NaN.)
+ if (DAG.isKnownNeverNaN(LHS))
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // Likewise, for ULT/ULE we need to know that RHS is not a NaN.
+ if (DAG.isKnownNeverNaN(RHS))
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ // This can be vmax if we can prove that the LHS is not a NaN.
+ // (If either operand is NaN, the comparison will be false and the result
+ // will be the RHS, which matches vmax if RHS is the NaN.)
+ if (DAG.isKnownNeverNaN(LHS))
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ // Likewise, for UGT/UGE we need to know that RHS is not a NaN.
+ if (DAG.isKnownNeverNaN(RHS))
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+ }
+
+ if (!Opcode)
+ return SDValue();
+ return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::ADD: return PerformADDCombine(N, DCI);
- case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
- case ISD::INTRINSIC_WO_CHAIN:
- return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
- case ISD::SRL:
- return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
}
return SDValue();
}
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=96572&r1=96571&r2=96572&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Thu Feb 18 00:05:53 2010
@@ -131,7 +131,11 @@
VREV16, // reverse elements within 16-bit halfwords
VZIP, // zip (interleave)
VUZP, // unzip (deinterleave)
- VTRN // transpose
+ VTRN, // transpose
+
+ // Floating-point max and min:
+ FMAX,
+ FMIN
};
}
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=96572&r1=96571&r2=96572&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Thu Feb 18 00:05:53 2010
@@ -89,6 +89,11 @@
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
//===----------------------------------------------------------------------===//
// NEON operand definitions
//===----------------------------------------------------------------------===//
@@ -3023,6 +3028,20 @@
"vneg", "f32", "$dst, $src", "", []>;
def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
+// Vector Maximum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ "vmax", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmax, VMAXfd_sfp>;
+
+// Vector Minimum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ "vmin", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmin, VMINfd_sfp>;
+
// Vector Convert between single-precision FP and integer
let neverHasSideEffects = 1 in
def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
Added: llvm/trunk/test/CodeGen/ARM/neon_minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/neon_minmax.ll?rev=96572&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/neon_minmax.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/neon_minmax.ll Thu Feb 18 00:05:53 2010
@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+
+define float @fmin_ole(float %x) nounwind {
+;CHECK: fmin_ole:
+;CHECK: vmin.f32
+ %cond = fcmp ole float 1.0, %x
+ %min1 = select i1 %cond, float 1.0, float %x
+ ret float %min1
+}
+
+define float @fmin_ult(float %x) nounwind {
+;CHECK: fmin_ult:
+;CHECK: vmin.f32
+ %cond = fcmp ult float %x, 1.0
+ %min1 = select i1 %cond, float %x, float 1.0
+ ret float %min1
+}
+
+define float @fmax_ogt(float %x) nounwind {
+;CHECK: fmax_ogt:
+;CHECK: vmax.f32
+ %cond = fcmp ogt float 1.0, %x
+ %max1 = select i1 %cond, float 1.0, float %x
+ ret float %max1
+}
+
+define float @fmax_uge(float %x) nounwind {
+;CHECK: fmax_uge:
+;CHECK: vmax.f32
+ %cond = fcmp uge float %x, 1.0
+ %max1 = select i1 %cond, float %x, float 1.0
+ ret float %max1
+}
+
+define float @fmax_olt_reverse(float %x) nounwind {
+;CHECK: fmax_olt_reverse:
+;CHECK: vmax.f32
+ %cond = fcmp olt float %x, 1.0
+ %max1 = select i1 %cond, float 1.0, float %x
+ ret float %max1
+}
+
+define float @fmax_ule_reverse(float %x) nounwind {
+;CHECK: fmax_ule_reverse:
+;CHECK: vmax.f32
+ %cond = fcmp ult float 1.0, %x
+ %max1 = select i1 %cond, float %x, float 1.0
+ ret float %max1
+}
+
+define float @fmin_oge_reverse(float %x) nounwind {
+;CHECK: fmin_oge_reverse:
+;CHECK: vmin.f32
+ %cond = fcmp oge float %x, 1.0
+ %min1 = select i1 %cond, float 1.0, float %x
+ ret float %min1
+}
+
+define float @fmin_ugt_reverse(float %x) nounwind {
+;CHECK: fmin_ugt_reverse:
+;CHECK: vmin.f32
+ %cond = fcmp ugt float 1.0, %x
+ %min1 = select i1 %cond, float %x, float 1.0
+ ret float %min1
+}
More information about the llvm-commits
mailing list