[llvm] r224094 - R600: Fix min/max matching problems with unordered compares
Matt Arsenault
Matthew.Arsenault at amd.com
Thu Dec 11 18:30:38 PST 2014
Author: arsenm
Date: Thu Dec 11 20:30:37 2014
New Revision: 224094
URL: http://llvm.org/viewvc/llvm-project?rev=224094&view=rev
Log:
R600: Fix min/max matching problems with unordered compares
The returned operand needs to be permuted for the unordered
compares. Also fix incorrectly producing fmin_legacy / fmax_legacy
for f64, which don't exist.
Added:
llvm/trunk/test/CodeGen/R600/fmax_legacy.f64.ll
llvm/trunk/test/CodeGen/R600/fmin_legacy.f64.ll
Modified:
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h
llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
llvm/trunk/lib/Target/R600/SIInstructions.td
llvm/trunk/test/CodeGen/R600/fmax_legacy.ll
llvm/trunk/test/CodeGen/R600/fmin_legacy.ll
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=224094&r1=224093&r2=224094&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Thu Dec 11 20:30:37 2014
@@ -1038,17 +1038,21 @@ SDValue AMDGPUTargetLowering::LowerIntri
}
/// \brief Generate Min/Max node
-SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ DAGCombinerInfo &DCI) const {
+ if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return SDValue();
+
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
+ SelectionDAG &DAG = DCI.DAG;
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
switch (CCOpcode) {
case ISD::SETOEQ:
@@ -1065,33 +1069,51 @@ SDValue AMDGPUTargetLowering::CombineFMi
case ISD::SETO:
break;
case ISD::SETULE:
- case ISD::SETULT:
+ case ISD::SETULT: {
+ // Unordered.
+ //
+ // We will allow this before legalization since we expand unordered compares
+ // ordinarily.
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
+ }
case ISD::SETOLE:
case ISD::SETOLT:
case ISD::SETLE:
case ISD::SETLT: {
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- break;
+ // Ordered. Assume ordered for undefined.
+
+ // Only do this after legalization to avoid interfering with other combines
+ // which might occur.
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
+ !DCI.isCalledByLegalizer())
+ return SDValue();
// We need to permute the operands to get the correct NaN behavior. The
// selected operand is the second one based on the failing compare with NaN,
// so permute it based on the compare type the hardware uses.
if (LHS == True)
- return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
- return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
+ }
+ case ISD::SETUGE:
+ case ISD::SETUGT: {
+ if (LHS == True)
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
}
case ISD::SETGT:
case ISD::SETGE:
- case ISD::SETUGE:
case ISD::SETOGE:
- case ISD::SETUGT:
case ISD::SETOGT: {
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- break;
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
+ !DCI.isCalledByLegalizer())
+ return SDValue();
if (LHS == True)
- return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
- return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
+ return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
+ return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
}
case ISD::SETCC_INVALID:
llvm_unreachable("Invalid setcc condcode!");
@@ -2276,24 +2298,6 @@ SDValue AMDGPUTargetLowering::PerformDAG
simplifyI24(N1, DCI);
return SDValue();
}
- case ISD::SELECT_CC: {
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- if (VT == MVT::f32 ||
- (VT == MVT::f64 &&
- Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDValue True = N->getOperand(2);
- SDValue False = N->getOperand(3);
- SDValue CC = N->getOperand(4);
-
- return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
- }
-
- break;
- }
case ISD::SELECT: {
SDValue Cond = N->getOperand(0);
if (Cond.getOpcode() == ISD::SETCC) {
@@ -2306,11 +2310,8 @@ SDValue AMDGPUTargetLowering::PerformDAG
SDValue True = N->getOperand(1);
SDValue False = N->getOperand(2);
- if (VT == MVT::f32 ||
- (VT == MVT::f64 &&
- Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
- return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
- }
+ if (VT == MVT::f32)
+ return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
// TODO: Implement min / max Evergreen instructions.
if (VT == MVT::i32 &&
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h?rev=224094&r1=224093&r2=224094&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h Thu Dec 11 20:30:37 2014
@@ -145,14 +145,14 @@ public:
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
- SDValue CombineFMinMax(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- SelectionDAG &DAG) const;
+ SDValue CombineFMinMaxLegacy(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ DAGCombinerInfo &DCI) const;
SDValue CombineIMinMax(SDLoc DL,
EVT VT,
SDValue LHS,
Modified: llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600ISelLowering.cpp?rev=224094&r1=224093&r2=224094&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600ISelLowering.cpp Thu Dec 11 20:30:37 2014
@@ -1118,6 +1118,13 @@ SDValue R600TargetLowering::LowerSELECT_
SDValue CC = Op.getOperand(4);
SDValue Temp;
+ if (VT == MVT::f32) {
+ DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
+ SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
+ if (MinMax)
+ return MinMax;
+ }
+
// LHS and RHS are guaranteed to be the same value type
EVT CompareVT = LHS.getValueType();
Modified: llvm/trunk/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstructions.td?rev=224094&r1=224093&r2=224094&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstructions.td Thu Dec 11 20:30:37 2014
@@ -1514,6 +1514,7 @@ let isCommutable = 1 in {
defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
VOP_F32_F32_F32
>;
+} // End isCommutable = 1
defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
VOP_F32_F32_F32, AMDGPUfmin_legacy
@@ -1522,6 +1523,7 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0
VOP_F32_F32_F32, AMDGPUfmax_legacy
>;
+let isCommutable = 1 in {
defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
VOP_I32_I32_I32, sra
Added: llvm/trunk/test/CodeGen/R600/fmax_legacy.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fmax_legacy.f64.ll?rev=224094&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/fmax_legacy.f64.ll (added)
+++ llvm/trunk/test/CodeGen/R600/fmax_legacy.f64.ll Thu Dec 11 20:30:37 2014
@@ -0,0 +1,67 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; Make sure we don't try to form FMAX_LEGACY nodes with f64
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FUNC-LABEL: @test_fmax_legacy_uge_f64
+define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp uge double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_oge_f64
+define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp oge double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ugt_f64
+define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp ugt double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ogt_f64
+define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp ogt double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/R600/fmax_legacy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fmax_legacy.ll?rev=224094&r1=224093&r2=224094&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/fmax_legacy.ll (original)
+++ llvm/trunk/test/CodeGen/R600/fmax_legacy.ll Thu Dec 11 20:30:37 2014
@@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
; FUNC-LABEL: @test_fmax_legacy_uge_f32
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
; EG: MAX
define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
@@ -44,7 +44,7 @@ define void @test_fmax_legacy_oge_f32(fl
; FUNC-LABEL: @test_fmax_legacy_ugt_f32
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
; EG: MAX
define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
Added: llvm/trunk/test/CodeGen/R600/fmin_legacy.f64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fmin_legacy.f64.ll?rev=224094&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/fmin_legacy.f64.ll (added)
+++ llvm/trunk/test/CodeGen/R600/fmin_legacy.f64.ll Thu Dec 11 20:30:37 2014
@@ -0,0 +1,77 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FUNC-LABEL: @test_fmin_legacy_f64
+define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double> inreg %reg0) #0 {
+ %r0 = extractelement <4 x double> %reg0, i32 0
+ %r1 = extractelement <4 x double> %reg0, i32 1
+ %r2 = fcmp uge double %r0, %r1
+ %r3 = select i1 %r2, double %r1, double %r0
+ %vec = insertelement <4 x double> undef, double %r3, i32 0
+ store <4 x double> %vec, <4 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ule_f64
+define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp ule double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ole_f64
+define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp ole double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_olt_f64
+define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp olt double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ult_f64
+define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+ %a = load double addrspace(1)* %gep.0, align 8
+ %b = load double addrspace(1)* %gep.1, align 8
+
+ %cmp = fcmp ult double %a, %b
+ %val = select i1 %cmp, double %a, double %b
+ store double %val, double addrspace(1)* %out, align 8
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
Modified: llvm/trunk/test/CodeGen/R600/fmin_legacy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fmin_legacy.ll?rev=224094&r1=224093&r2=224094&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/fmin_legacy.ll (original)
+++ llvm/trunk/test/CodeGen/R600/fmin_legacy.ll Thu Dec 11 20:30:37 2014
@@ -19,7 +19,7 @@ define void @test_fmin_legacy_f32(<4 x f
; FUNC-LABEL: @test_fmin_legacy_ule_f32
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
@@ -73,7 +73,7 @@ define void @test_fmin_legacy_olt_f32(fl
; FUNC-LABEL: @test_fmin_legacy_ult_f32
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
More information about the llvm-commits
mailing list