[llvm] r259089 - AMDGPU: Match some med3 patterns
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 28 12:53:43 PST 2016
Author: arsenm
Date: Thu Jan 28 14:53:42 2016
New Revision: 259089
URL: http://llvm.org/viewvc/llvm-project?rev=259089&view=rev
Log:
AMDGPU: Match some med3 patterns
Added:
llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll
llvm/trunk/test/CodeGen/AMDGPU/smed3.ll
llvm/trunk/test/CodeGen/AMDGPU/umed3.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=259089&r1=259088&r2=259089&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Thu Jan 28 14:53:42 2016
@@ -169,6 +169,12 @@ def FeatureFP64Denormals : SubtargetFeat
[FeatureFP64]
>;
+def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
+ "FPExceptions",
+ "true",
+ "Enable floating point exceptions"
+>;
+
def FeatureEnableHugeScratchBuffer : SubtargetFeature<
"huge-scratch-buffer",
"EnableHugeScratchBuffer",
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=259089&r1=259088&r2=259089&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Thu Jan 28 14:53:42 2016
@@ -397,7 +397,7 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
// SI at least has hardware support for floating point exceptions, but no way
// of using or handling them is implemented. They are also optional in OpenCL
// (Section 7.3)
- setHasFloatingPointExceptions(false);
+ setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
setSelectIsExpensive(false);
PredictableSelectIsExpensive = false;
@@ -2949,6 +2949,9 @@ const char* AMDGPUTargetLowering::getTar
NODE_NAME_CASE(FMIN3)
NODE_NAME_CASE(SMIN3)
NODE_NAME_CASE(UMIN3)
+ NODE_NAME_CASE(FMED3)
+ NODE_NAME_CASE(SMED3)
+ NODE_NAME_CASE(UMED3)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(DIV_SCALE)
NODE_NAME_CASE(DIV_FMAS)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=259089&r1=259088&r2=259089&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Thu Jan 28 14:53:42 2016
@@ -257,6 +257,9 @@ enum NodeType : unsigned {
FMIN3,
SMIN3,
UMIN3,
+ FMED3,
+ SMED3,
+ UMED3,
URECIP,
DIV_SCALE,
DIV_FMAS,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td?rev=259089&r1=259088&r2=259089&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td Thu Jan 28 14:53:42 2016
@@ -209,6 +209,16 @@ def AMDGPUmad_i24 : SDNode<"AMDGPUISD::M
[]
>;
+def AMDGPUsmed3 : SDNode<"AMDGPUISD::SMED3", AMDGPUDTIntTernaryOp,
+ []
+>;
+
+def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp,
+ []
+>;
+
+def AMDGPUfmed3 : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>;
+
def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
[SDNPHasChain, SDNPInGlue]>;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=259089&r1=259088&r2=259089&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Thu Jan 28 14:53:42 2016
@@ -66,9 +66,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
: AMDGPUGenSubtargetInfo(TT, GPU, FS),
DumpCode(false), R600ALUInst(false), HasVertexCache(false),
TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
- FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
- HalfRate64Ops(false), CaymanISA(false), FlatAddressSpace(false),
- FlatForGlobal(false), EnableIRStructurizer(true),
+ FP64Denormals(false), FP32Denormals(false), FPExceptions(false),
+ FastFMAF32(false), HalfRate64Ops(false), CaymanISA(false),
+ FlatAddressSpace(false), FlatForGlobal(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false),
EnableIfCvt(true), EnableLoadStoreOpt(false),
EnableUnsafeDSOffsetFolding(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=259089&r1=259088&r2=259089&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Thu Jan 28 14:53:42 2016
@@ -66,6 +66,7 @@ private:
bool FP64;
bool FP64Denormals;
bool FP32Denormals;
+ bool FPExceptions;
bool FastFMAF32;
bool HalfRate64Ops;
bool CaymanISA;
@@ -150,6 +151,10 @@ public:
return FP64Denormals;
}
+ bool hasFPExceptions() const {
+ return FPExceptions;
+ }
+
bool hasFastFMAF32() const {
return FastFMAF32;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=259089&r1=259088&r2=259089&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Jan 28 14:53:42 2016
@@ -2131,8 +2131,70 @@ static unsigned minMaxOpcToMin3Max3Opc(u
}
}
-SDValue SITargetLowering::performMin3Max3Combine(SDNode *N,
- DAGCombinerInfo &DCI) const {
+static SDValue performIntMed3ImmCombine(SelectionDAG &DAG,
+ SDLoc SL,
+ SDValue Op0,
+ SDValue Op1,
+ bool Signed) {
+ ConstantSDNode *K1 = dyn_cast<ConstantSDNode>(Op1);
+ if (!K1)
+ return SDValue();
+
+ ConstantSDNode *K0 = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ if (!K0)
+ return SDValue();
+
+
+ if (Signed) {
+ if (K0->getAPIntValue().sge(K1->getAPIntValue()))
+ return SDValue();
+ } else {
+ if (K0->getAPIntValue().uge(K1->getAPIntValue()))
+ return SDValue();
+ }
+
+ EVT VT = K0->getValueType(0);
+ return DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, VT,
+ Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
+}
+
+static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
+ if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions())
+ return true;
+
+ return DAG.isKnownNeverNaN(Op);
+}
+
+static SDValue performFPMed3ImmCombine(SelectionDAG &DAG,
+ SDLoc SL,
+ SDValue Op0,
+ SDValue Op1) {
+ ConstantFPSDNode *K1 = dyn_cast<ConstantFPSDNode>(Op1);
+ if (!K1)
+ return SDValue();
+
+ ConstantFPSDNode *K0 = dyn_cast<ConstantFPSDNode>(Op0.getOperand(1));
+ if (!K0)
+ return SDValue();
+
+ // Ordered >= (although NaN inputs should have folded away by now).
+ APFloat::cmpResult Cmp = K0->getValueAPF().compare(K1->getValueAPF());
+ if (Cmp == APFloat::cmpGreaterThan)
+ return SDValue();
+
+ // This isn't safe with signaling NaNs because in IEEE mode, min/max on a
+ // signaling NaN gives a quiet NaN. The quiet NaN input to the min would then
+ // give the other result, which is different from med3 with a NaN input.
+ SDValue Var = Op0.getOperand(0);
+ if (!isKnownNeverSNan(DAG, Var))
+ return SDValue();
+
+ return DAG.getNode(AMDGPUISD::FMED3, SL, K0->getValueType(0),
+ Var, SDValue(K0, 0), SDValue(K1, 0));
+}
+
+SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
unsigned Opc = N->getOpcode();
@@ -2142,7 +2204,8 @@ SDValue SITargetLowering::performMin3Max
// Only do this if the inner op has one use since this will just increases
// register pressure for no benefit.
- // max(max(a, b), c)
+ // max(max(a, b), c) -> max3(a, b, c)
+ // min(min(a, b), c) -> min3(a, b, c)
if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
SDLoc DL(N);
return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
@@ -2153,7 +2216,9 @@ SDValue SITargetLowering::performMin3Max
Op1);
}
- // max(a, max(b, c))
+ // Try commuted.
+ // max(a, max(b, c)) -> max3(a, b, c)
+ // min(a, min(b, c)) -> min3(a, b, c)
if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
SDLoc DL(N);
return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
@@ -2164,6 +2229,24 @@ SDValue SITargetLowering::performMin3Max
Op1.getOperand(1));
}
+ // min(max(x, K0), K1), K0 < K1 -> med3(x, K0, K1)
+ if (Opc == ISD::SMIN && Op0.getOpcode() == ISD::SMAX && Op0.hasOneUse()) {
+ if (SDValue Med3 = performIntMed3ImmCombine(DAG, SDLoc(N), Op0, Op1, true))
+ return Med3;
+ }
+
+ if (Opc == ISD::UMIN && Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
+ if (SDValue Med3 = performIntMed3ImmCombine(DAG, SDLoc(N), Op0, Op1, false))
+ return Med3;
+ }
+
+ // fminnum(fmaxnum(x, K0), K1), K0 < K1 && !is_snan(x) -> fmed3(x, K0, K1)
+ if (Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM &&
+ N->getValueType(0) == MVT::f32 && Op0.hasOneUse()) {
+ if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
+ return Res;
+ }
+
return SDValue();
}
@@ -2217,7 +2300,7 @@ SDValue SITargetLowering::PerformDAGComb
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
- return performMin3Max3Combine(N, DCI);
+ return performMinMaxCombine(N, DCI);
break;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=259089&r1=259088&r2=259089&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Thu Jan 28 14:53:42 2016
@@ -54,7 +54,8 @@ class SITargetLowering : public AMDGPUTa
SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=259089&r1=259088&r2=259089&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Thu Jan 28 14:53:42 2016
@@ -1695,13 +1695,13 @@ defm V_MAX3_U32 : VOP3Inst <vop3<0x156,
VOP_I32_I32_I32_I32, AMDGPUumax3
>;
defm V_MED3_F32 : VOP3Inst <vop3<0x157, 0x1d6>, "v_med3_f32",
- VOP_F32_F32_F32_F32
+ VOP_F32_F32_F32_F32, AMDGPUfmed3
>;
defm V_MED3_I32 : VOP3Inst <vop3<0x158, 0x1d7>, "v_med3_i32",
- VOP_I32_I32_I32_I32
+ VOP_I32_I32_I32_I32, AMDGPUsmed3
>;
defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32",
- VOP_I32_I32_I32_I32
+ VOP_I32_I32_I32_I32, AMDGPUumed3
>;
//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
Added: llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll?rev=259089&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmed3.ll Thu Jan 28 14:53:42 2016
@@ -0,0 +1,131 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=NOSNAN -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mattr=+fp-exceptions -verify-machineinstrs < %s | FileCheck -check-prefix=SNAN -check-prefix=GCN %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare float @llvm.minnum.f32(float, float) #0
+declare float @llvm.maxnum.f32(float, float) #0
+declare double @llvm.minnum.f64(double, double) #0
+declare double @llvm.maxnum.f64(double, double) #0
+
+; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f32:
+; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
+
+; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
+; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
+define void @v_test_fmed3_r_i_i_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep0
+
+ %max = call float @llvm.maxnum.f32(float %a, float 2.0)
+ %med = call float @llvm.minnum.f32(float %max, float 4.0)
+
+ store float %med, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute0_f32:
+; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
+
+; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
+; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
+define void @v_test_fmed3_r_i_i_commute0_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep0
+
+ %max = call float @llvm.maxnum.f32(float 2.0, float %a)
+ %med = call float @llvm.minnum.f32(float 4.0, float %max)
+
+ store float %med, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_commute1_f32:
+; NOSNAN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
+
+; SNAN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
+; SNAN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
+define void @v_test_fmed3_r_i_i_commute1_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep0
+
+ %max = call float @llvm.maxnum.f32(float %a, float 2.0)
+ %med = call float @llvm.minnum.f32(float 4.0, float %max)
+
+ store float %med, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_constant_order_f32:
+; GCN: v_max_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
+; GCN: v_min_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
+define void @v_test_fmed3_r_i_i_constant_order_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep0
+
+ %max = call float @llvm.maxnum.f32(float %a, float 4.0)
+ %med = call float @llvm.minnum.f32(float %max, float 2.0)
+
+ store float %med, float addrspace(1)* %outgep
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_multi_use_f32:
+; GCN: v_max_f32_e32 v{{[0-9]+}}, 2.0, v{{[0-9]+}}
+; GCN: v_min_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}
+define void @v_test_fmed3_r_i_i_multi_use_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep0
+
+ %max = call float @llvm.maxnum.f32(float %a, float 2.0)
+ %med = call float @llvm.minnum.f32(float %max, float 4.0)
+
+ store volatile float %med, float addrspace(1)* %outgep
+ store volatile float %max, float addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_f64:
+; GCN: v_max_f64 {{v\[[0-9]+:[0-9]+\]}}, 2.0, {{v\[[0-9]+:[0-9]+\]}}
+; GCN: v_min_f64 {{v\[[0-9]+:[0-9]+\]}}, 4.0, {{v\[[0-9]+:[0-9]+\]}}
+define void @v_test_fmed3_r_i_i_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %a = load double, double addrspace(1)* %gep0
+
+ %max = call double @llvm.maxnum.f64(double %a, double 2.0)
+ %med = call double @llvm.minnum.f64(double %max, double 4.0)
+
+ store double %med, double addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_fmed3_r_i_i_no_nans_f32:
+; GCN: v_med3_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2.0, 4.0
+define void @v_test_fmed3_r_i_i_no_nans_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep0
+
+ %max = call float @llvm.maxnum.f32(float %a, float 2.0)
+ %med = call float @llvm.minnum.f32(float %max, float 4.0)
+
+ store float %med, float addrspace(1)* %outgep
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
+attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
Added: llvm/trunk/test/CodeGen/AMDGPU/smed3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/smed3.ll?rev=259089&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/smed3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/smed3.ll Thu Jan 28 14:53:42 2016
@@ -0,0 +1,120 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+
+; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i32:
+; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
+define void @v_test_smed3_r_i_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0
+
+ %icmp0 = icmp sgt i32 %a, 12
+ %i0 = select i1 %icmp0, i32 %a, i32 12
+
+ %icmp1 = icmp slt i32 %i0, 17
+ %i1 = select i1 %icmp1, i32 %i0, i32 17
+
+ store i32 %i1, i32 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_smed3_multi_use_r_i_i_i32:
+; GCN: v_max_i32
+; GCN: v_min_i32
+define void @v_test_smed3_multi_use_r_i_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0
+
+ %icmp0 = icmp sgt i32 %a, 12
+ %i0 = select i1 %icmp0, i32 %a, i32 12
+
+ %icmp1 = icmp slt i32 %i0, 17
+ %i1 = select i1 %icmp1, i32 %i0, i32 17
+
+ store volatile i32 %i0, i32 addrspace(1)* %outgep
+ store volatile i32 %i1, i32 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_smed3_r_i_i_constant_order_i32:
+; GCN: v_max_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
+; GCN: v_min_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
+define void @v_test_smed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0
+
+ %icmp0 = icmp sgt i32 %a, 17
+ %i0 = select i1 %icmp0, i32 %a, i32 17
+
+ %icmp1 = icmp slt i32 %i0, 12
+ %i1 = select i1 %icmp1, i32 %i0, i32 12
+
+ store i32 %i1, i32 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_smed3_r_i_i_sign_mismatch_i32:
+; GCN: v_max_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
+; GCN: v_min_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
+define void @v_test_smed3_r_i_i_sign_mismatch_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0
+
+ %icmp0 = icmp ugt i32 %a, 12
+ %i0 = select i1 %icmp0, i32 %a, i32 12
+
+ %icmp1 = icmp slt i32 %i0, 17
+ %i1 = select i1 %icmp1, i32 %i0, i32 17
+
+ store i32 %i1, i32 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i64:
+; GCN: v_cmp_lt_i64
+; GCN: v_cmp_gt_i64
+define void @v_test_smed3_r_i_i_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %gep0
+
+ %icmp0 = icmp sgt i64 %a, 12
+ %i0 = select i1 %icmp0, i64 %a, i64 12
+
+ %icmp1 = icmp slt i64 %i0, 17
+ %i1 = select i1 %icmp1, i64 %i0, i64 17
+
+ store i64 %i1, i64 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i16:
+; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
+define void @v_test_smed3_r_i_i_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+ %a = load i16, i16 addrspace(1)* %gep0
+
+ %icmp0 = icmp sgt i16 %a, 12
+ %i0 = select i1 %icmp0, i16 %a, i16 12
+
+ %icmp1 = icmp slt i16 %i0, 17
+ %i1 = select i1 %icmp1, i16 %i0, i16 17
+
+ store i16 %i1, i16 addrspace(1)* %outgep
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
Added: llvm/trunk/test/CodeGen/AMDGPU/umed3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/umed3.ll?rev=259089&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/umed3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/umed3.ll Thu Jan 28 14:53:42 2016
@@ -0,0 +1,119 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+
+; GCN-LABEL: {{^}}v_test_umed3_r_i_i_i32:
+; GCN: v_med3_u32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
+define void @v_test_umed3_r_i_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0
+
+ %icmp0 = icmp ugt i32 %a, 12
+ %i0 = select i1 %icmp0, i32 %a, i32 12
+
+ %icmp1 = icmp ult i32 %i0, 17
+ %i1 = select i1 %icmp1, i32 %i0, i32 17
+
+ store i32 %i1, i32 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_umed3_multi_use_r_i_i_i32:
+; GCN: v_max_u32
+; GCN: v_min_u32
+define void @v_test_umed3_multi_use_r_i_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0
+
+ %icmp0 = icmp ugt i32 %a, 12
+ %i0 = select i1 %icmp0, i32 %a, i32 12
+
+ %icmp1 = icmp ult i32 %i0, 17
+ %i1 = select i1 %icmp1, i32 %i0, i32 17
+
+ store volatile i32 %i0, i32 addrspace(1)* %outgep
+ store volatile i32 %i1, i32 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_umed3_r_i_i_constant_order_i32:
+; GCN: v_max_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
+; GCN: v_min_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
+define void @v_test_umed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0
+
+ %icmp0 = icmp ugt i32 %a, 17
+ %i0 = select i1 %icmp0, i32 %a, i32 17
+
+ %icmp1 = icmp ult i32 %i0, 12
+ %i1 = select i1 %icmp1, i32 %i0, i32 12
+
+ store i32 %i1, i32 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_umed3_r_i_i_sign_mismatch_i32:
+; GCN: v_max_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
+; GCN: v_min_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
+define void @v_test_umed3_r_i_i_sign_mismatch_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0
+
+ %icmp0 = icmp sgt i32 %a, 12
+ %i0 = select i1 %icmp0, i32 %a, i32 12
+
+ %icmp1 = icmp ult i32 %i0, 17
+ %i1 = select i1 %icmp1, i32 %i0, i32 17
+
+ store i32 %i1, i32 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_umed3_r_i_i_i64:
+; GCN: v_cmp_lt_u64
+; GCN: v_cmp_gt_u64
+define void @v_test_umed3_r_i_i_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %gep0
+
+ %icmp0 = icmp ugt i64 %a, 12
+ %i0 = select i1 %icmp0, i64 %a, i64 12
+
+ %icmp1 = icmp ult i64 %i0, 17
+ %i1 = select i1 %icmp1, i64 %i0, i64 17
+
+ store i64 %i1, i64 addrspace(1)* %outgep
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_umed3_r_i_i_i16:
+; GCN: v_med3_u32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
+define void @v_test_umed3_r_i_i_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %gep0 = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid
+ %outgep = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+ %a = load i16, i16 addrspace(1)* %gep0
+
+ %icmp0 = icmp ugt i16 %a, 12
+ %i0 = select i1 %icmp0, i16 %a, i16 12
+
+ %icmp1 = icmp ult i16 %i0, 17
+ %i1 = select i1 %icmp1, i16 %i0, i16 17
+
+ store i16 %i1, i16 addrspace(1)* %outgep
+ ret void
+}
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
More information about the llvm-commits
mailing list