[llvm] r221941 - R600/SI: Fix fmin_legacy / fmax_legacy matching for SI
Matt Arsenault
Matthew.Arsenault at amd.com
Thu Nov 13 15:03:10 PST 2014
Author: arsenm
Date: Thu Nov 13 17:03:09 2014
New Revision: 221941
URL: http://llvm.org/viewvc/llvm-project?rev=221941&view=rev
Log:
R600/SI: Fix fmin_legacy / fmax_legacy matching for SI
select_cc is expanded on SI, so this was never matched.
Added:
llvm/trunk/test/CodeGen/R600/fmax_legacy.ll
llvm/trunk/test/CodeGen/R600/fmin_legacy.ll
Modified:
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h
llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td
llvm/trunk/lib/Target/R600/R600Instructions.td
llvm/trunk/lib/Target/R600/SIInstructions.td
llvm/trunk/test/CodeGen/R600/fcmp64.ll
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=221941&r1=221940&r2=221941&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Thu Nov 13 17:03:09 2014
@@ -378,6 +378,7 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::STORE);
@@ -999,21 +1000,21 @@ SDValue AMDGPUTargetLowering::LowerIntri
}
/// \brief Generate Min/Max node
-SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N,
+SDValue AMDGPUTargetLowering::CombineMinMax(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
SelectionDAG &DAG) const {
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- SDValue True = N->getOperand(2);
- SDValue False = N->getOperand(3);
- SDValue CC = N->getOperand(4);
+ if (VT != MVT::f32 &&
+ (VT != MVT::f64 ||
+ Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS))
+ return SDValue();
- if (VT != MVT::f32 ||
- !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
- }
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
switch (CCOpcode) {
@@ -1029,14 +1030,15 @@ SDValue AMDGPUTargetLowering::CombineMin
case ISD::SETTRUE2:
case ISD::SETUO:
case ISD::SETO:
- llvm_unreachable("Operation should already be optimised!");
+ break;
case ISD::SETULE:
case ISD::SETULT:
case ISD::SETOLE:
case ISD::SETOLT:
case ISD::SETLE:
case ISD::SETLT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::FMIN : AMDGPUISD::FMAX;
+ unsigned Opc
+ = (LHS == True) ? AMDGPUISD::FMIN_LEGACY : AMDGPUISD::FMAX_LEGACY;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
case ISD::SETGT:
@@ -1045,7 +1047,8 @@ SDValue AMDGPUTargetLowering::CombineMin
case ISD::SETOGE:
case ISD::SETUGT:
case ISD::SETOGT: {
- unsigned Opc = (LHS == True) ? AMDGPUISD::FMAX : AMDGPUISD::FMIN;
+ unsigned Opc
+ = (LHS == True) ? AMDGPUISD::FMAX_LEGACY : AMDGPUISD::FMIN_LEGACY;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
case ISD::SETCC_INVALID:
@@ -2110,9 +2113,37 @@ SDValue AMDGPUTargetLowering::PerformDAG
simplifyI24(N1, DCI);
return SDValue();
}
- case ISD::SELECT_CC: {
- return CombineMinMax(N, DAG);
+ case ISD::SELECT_CC: {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue True = N->getOperand(2);
+ SDValue False = N->getOperand(3);
+ SDValue CC = N->getOperand(4);
+
+ return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
+ }
+ case ISD::SELECT: {
+ SDValue Cond = N->getOperand(0);
+ if (Cond.getOpcode() == ISD::SETCC) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ SDValue CC = Cond.getOperand(2);
+
+ SDValue True = N->getOperand(1);
+ SDValue False = N->getOperand(2);
+
+
+ return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
}
+
+ break;
+ }
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
assert(!N->getValueType(0).isVector() &&
@@ -2289,10 +2320,10 @@ const char* AMDGPUTargetLowering::getTar
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(CLAMP)
NODE_NAME_CASE(MAD)
- NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(FMAX_LEGACY)
NODE_NAME_CASE(SMAX)
NODE_NAME_CASE(UMAX)
- NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(FMIN_LEGACY)
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
NODE_NAME_CASE(URECIP)
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h?rev=221941&r1=221940&r2=221941&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h Thu Nov 13 17:03:09 2014
@@ -140,7 +140,14 @@ public:
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
- SDValue CombineMinMax(SDNode *N, SelectionDAG &DAG) const;
+ SDValue CombineMinMax(SDLoc DL,
+ EVT VT,
+ SDValue LHS,
+ SDValue RHS,
+ SDValue True,
+ SDValue False,
+ SDValue CC,
+ SelectionDAG &DAG) const;
const char* getTargetNodeName(unsigned Opcode) const override;
virtual SDNode *PostISelFolding(MachineSDNode *N,
@@ -188,10 +195,10 @@ enum {
// Denormals handled on some parts.
COS_HW,
SIN_HW,
- FMAX,
+ FMAX_LEGACY,
SMAX,
UMAX,
- FMIN,
+ FMIN_LEGACY,
SMIN,
UMIN,
URECIP,
Modified: llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td?rev=221941&r1=221940&r2=221941&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td Thu Nov 13 17:03:09 2014
@@ -58,9 +58,12 @@ def AMDGPUrsq_clamped : SDNode<"AMDGPUIS
def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
-// out = max(a, b) a and b are floats
-def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]
+// out = max(a, b) a and b are floats, where a nan comparison fails.
+// This is not commutative because this gives the second operand:
+// x < nan ? x : nan -> nan
+// nan < x ? nan : x -> x
+def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
+ [SDNPAssociative]
>;
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
@@ -76,9 +79,9 @@ def AMDGPUumax : SDNode<"AMDGPUISD::UMAX
[SDNPCommutative, SDNPAssociative]
>;
-// out = min(a, b) a and b are floats
-def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]
+// out = min(a, b) a and b are floats, where a nan comparison fails.
+def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
+ [SDNPAssociative]
>;
// out = min(a, b) a snd b are signed ints
@@ -137,7 +140,7 @@ def AMDGPUregister_store : SDNode<"AMDGP
// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src)
//
// src0: vec4(src, 0, 0, mask)
-// src1: dst - rat offset (aka pointer) in dwords
+// src1: dst - rat offset (aka pointer) in dwords
def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
SDTypeProfile<0, 2, []>,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
Modified: llvm/trunk/lib/Target/R600/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Instructions.td?rev=221941&r1=221940&r2=221941&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600Instructions.td (original)
+++ llvm/trunk/lib/Target/R600/R600Instructions.td Thu Nov 13 17:03:09 2014
@@ -674,8 +674,9 @@ def ADD : R600_2OP_Helper <0x0, "ADD", f
// Non-IEEE MUL: 0 * anything = 0
def MUL : R600_2OP_Helper <0x1, "MUL NON-IEEE", int_AMDGPU_mul>;
def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
-def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax>;
-def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
+// TODO: Do these actually match the regular fmin/fmax behavior?
+def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
+def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
// so some of the instruction names don't match the asm string.
Modified: llvm/trunk/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstructions.td?rev=221941&r1=221940&r2=221941&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstructions.td Thu Nov 13 17:03:09 2014
@@ -1398,11 +1398,11 @@ defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb>
defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
- VOP_F32_F32_F32, AMDGPUfmin
+ VOP_F32_F32_F32, AMDGPUfmin_legacy
>;
defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
- VOP_F32_F32_F32, AMDGPUfmax
+ VOP_F32_F32_F32, AMDGPUfmax_legacy
>;
defm V_MIN_F32 : VOP2Inst <vop2<0xf>, "v_min_f32", VOP_F32_F32_F32, fminnum>;
Modified: llvm/trunk/test/CodeGen/R600/fcmp64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fcmp64.ll?rev=221941&r1=221940&r2=221941&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/fcmp64.ll (original)
+++ llvm/trunk/test/CodeGen/R600/fcmp64.ll Thu Nov 13 17:03:09 2014
@@ -1,60 +1,55 @@
; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; CHECK: {{^}}flt_f64:
+; CHECK-LABEL: {{^}}flt_f64:
; CHECK: v_cmp_lt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
%r1 = load double addrspace(1)* %in2
%r2 = fcmp ult double %r0, %r1
- %r3 = select i1 %r2, double %r0, double %r1
- store double %r3, double addrspace(1)* %out
+ %r3 = zext i1 %r2 to i32
+ store i32 %r3, i32 addrspace(1)* %out
ret void
}
-; CHECK: {{^}}fle_f64:
+; CHECK-LABEL: {{^}}fle_f64:
; CHECK: v_cmp_le_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
%r1 = load double addrspace(1)* %in2
%r2 = fcmp ule double %r0, %r1
- %r3 = select i1 %r2, double %r0, double %r1
- store double %r3, double addrspace(1)* %out
+ %r3 = zext i1 %r2 to i32
+ store i32 %r3, i32 addrspace(1)* %out
ret void
}
-; CHECK: {{^}}fgt_f64:
+; CHECK-LABEL: {{^}}fgt_f64:
; CHECK: v_cmp_gt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
%r1 = load double addrspace(1)* %in2
%r2 = fcmp ugt double %r0, %r1
- %r3 = select i1 %r2, double %r0, double %r1
- store double %r3, double addrspace(1)* %out
+ %r3 = zext i1 %r2 to i32
+ store i32 %r3, i32 addrspace(1)* %out
ret void
}
-; CHECK: {{^}}fge_f64:
+; CHECK-LABEL: {{^}}fge_f64:
; CHECK: v_cmp_ge_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
%r1 = load double addrspace(1)* %in2
%r2 = fcmp uge double %r0, %r1
- %r3 = select i1 %r2, double %r0, double %r1
- store double %r3, double addrspace(1)* %out
+ %r3 = zext i1 %r2 to i32
+ store i32 %r3, i32 addrspace(1)* %out
ret void
}
-; CHECK: {{^}}fne_f64:
+; CHECK-LABEL: {{^}}fne_f64:
; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
@@ -65,9 +60,8 @@ define void @fne_f64(double addrspace(1)
ret void
}
-; CHECK: {{^}}feq_f64:
+; CHECK-LABEL: {{^}}feq_f64:
; CHECK: v_cmp_eq_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
%r0 = load double addrspace(1)* %in1
Added: llvm/trunk/test/CodeGen/R600/fmax_legacy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fmax_legacy.ll?rev=221941&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/fmax_legacy.ll (added)
+++ llvm/trunk/test/CodeGen/R600/fmax_legacy.ll Thu Nov 13 17:03:09 2014
@@ -0,0 +1,42 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: @test_fmax_legacy_uge_f32
+; SI: v_max_legacy_f32_e32
+; EG: MAX
+define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %cmp = fcmp uge float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_oge_f32
+; SI: v_max_legacy_f32_e32
+; EG: MAX
+define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %cmp = fcmp oge float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ugt_f32
+; SI: v_max_legacy_f32_e32
+; EG: MAX
+define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %cmp = fcmp ugt float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ogt_f32
+; SI: v_max_legacy_f32_e32
+; EG: MAX
+define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %cmp = fcmp ogt float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
Added: llvm/trunk/test/CodeGen/R600/fmin_legacy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/fmin_legacy.ll?rev=221941&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/R600/fmin_legacy.ll (added)
+++ llvm/trunk/test/CodeGen/R600/fmin_legacy.ll Thu Nov 13 17:03:09 2014
@@ -0,0 +1,51 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: @test_fmin_legacy_f32
+; EG: MIN *
+; SI: v_min_legacy_f32_e32
+define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) nounwind {
+ %r0 = extractelement <4 x float> %reg0, i32 0
+ %r1 = extractelement <4 x float> %reg0, i32 1
+ %r2 = fcmp uge float %r0, %r1
+ %r3 = select i1 %r2, float %r1, float %r0
+ %vec = insertelement <4 x float> undef, float %r3, i32 0
+ store <4 x float> %vec, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ule_f32
+; SI: v_min_legacy_f32_e32
+define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %cmp = fcmp ule float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ole_f32
+; SI: v_min_legacy_f32_e32
+define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %cmp = fcmp ole float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_olt_f32
+; SI: v_min_legacy_f32_e32
+define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %cmp = fcmp olt float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ult_f32
+; SI: v_min_legacy_f32_e32
+define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %cmp = fcmp ult float %a, %b
+ %val = select i1 %cmp, float %a, float %b
+ store float %val, float addrspace(1)* %out, align 4
+ ret void
+}
More information about the llvm-commits
mailing list