[llvm] r259090 - AMDGPU: Match fmed3 patterns with legacy fmin/fmax
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 28 12:53:48 PST 2016
Author: arsenm
Date: Thu Jan 28 14:53:48 2016
New Revision: 259090
URL: http://llvm.org/viewvc/llvm-project?rev=259090&view=rev
Log:
AMDGPU: Match fmed3 patterns with legacy fmin/fmax
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=259090&r1=259089&r2=259090&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Thu Jan 28 14:53:48 2016
@@ -2705,8 +2705,13 @@ SDValue AMDGPUTargetLowering::performSel
SDValue True = N->getOperand(1);
SDValue False = N->getOperand(2);
- if (VT == MVT::f32 && Cond.hasOneUse())
- return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
+ if (VT == MVT::f32 && Cond.hasOneUse()) {
+ SDValue MinMax
+ = CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
+ // Revisit this node so we can catch min3/max3/med3 patterns.
+ //DCI.AddToWorklist(MinMax.getNode());
+ return MinMax;
+ }
// There's no reason to not do this if the condition has other uses.
return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=259090&r1=259089&r2=259090&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Jan 28 14:53:48 2016
@@ -2204,29 +2204,31 @@ SDValue SITargetLowering::performMinMaxC
// Only do this if the inner op has one use since this will just increases
// register pressure for no benefit.
- // max(max(a, b), c) -> max3(a, b, c)
- // min(min(a, b), c) -> min3(a, b, c)
- if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
- SDLoc DL(N);
- return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
- DL,
- N->getValueType(0),
- Op0.getOperand(0),
- Op0.getOperand(1),
- Op1);
- }
+ if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY) {
+ // max(max(a, b), c) -> max3(a, b, c)
+ // min(min(a, b), c) -> min3(a, b, c)
+ if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
+ SDLoc DL(N);
+ return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
+ DL,
+ N->getValueType(0),
+ Op0.getOperand(0),
+ Op0.getOperand(1),
+ Op1);
+ }
- // Try commuted.
- // max(a, max(b, c)) -> max3(a, b, c)
- // min(a, min(b, c)) -> min3(a, b, c)
- if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
- SDLoc DL(N);
- return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
- DL,
- N->getValueType(0),
- Op0,
- Op1.getOperand(0),
- Op1.getOperand(1));
+ // Try commuted.
+ // max(a, max(b, c)) -> max3(a, b, c)
+ // min(a, min(b, c)) -> min3(a, b, c)
+ if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
+ SDLoc DL(N);
+ return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
+ DL,
+ N->getValueType(0),
+ Op0,
+ Op1.getOperand(0),
+ Op1.getOperand(1));
+ }
}
// min(max(x, K0), K1), K0 < K1 -> med3(x, K0, K1)
@@ -2241,7 +2243,9 @@ SDValue SITargetLowering::performMinMaxC
}
// fminnum(fmaxnum(x, K0), K1), K0 < K1 && !is_snan(x) -> fmed3(x, K0, K1)
- if (Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM &&
+ if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) ||
+ (Opc == AMDGPUISD::FMIN_LEGACY &&
+ Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
N->getValueType(0) == MVT::f32 && Op0.hasOneUse()) {
if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
return Res;
@@ -2291,12 +2295,14 @@ SDValue SITargetLowering::PerformDAGComb
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
case ISD::SETCC:
return performSetCCCombine(N, DCI);
- case ISD::FMAXNUM: // TODO: What about fmax_legacy?
+ case ISD::FMAXNUM:
case ISD::FMINNUM:
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
- case ISD::UMIN: {
+ case ISD::UMIN:
+ case AMDGPUISD::FMIN_LEGACY:
+ case AMDGPUISD::FMAX_LEGACY: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll?rev=259090&r1=259089&r2=259090&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll Thu Jan 28 14:53:48 2016
@@ -126,6 +126,29 @@ define void @v_test_fmed3_r_i_i_no_nans_
ret void
}
+; GCN-LABEL: {{^}}v_test_legacy_fmed3_r_i_i_f32:
+; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
+
+; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
+; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
+define void @v_test_legacy_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep0
+
+ ; fmax_legacy
+ %cmp0 = fcmp ule float %a, 2.0
+ %max = select i1 %cmp0, float 2.0, float %a
+
+ ; fmin_legacy
+ %cmp1 = fcmp uge float %max, 4.0
+ %med = select i1 %cmp1, float 4.0, float %max
+
+ store float %med, float addrspace(1)* %outgep
+ ret void
+}
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
More information about the llvm-commits
mailing list