[llvm] r295908 - AMDGPU: Use clamp with f64
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 22 15:53:37 PST 2017
Author: arsenm
Date: Wed Feb 22 17:53:37 2017
New Revision: 295908
URL: http://llvm.org/viewvc/llvm-project?rev=295908&view=rev
Log:
AMDGPU: Use clamp with f64
Modified:
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/test/CodeGen/AMDGPU/clamp-modifier.ll
llvm/trunk/test/CodeGen/AMDGPU/clamp.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=295908&r1=295907&r2=295908&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Wed Feb 22 17:53:37 2017
@@ -694,7 +694,8 @@ const MachineOperand *SIFoldOperands::is
unsigned Op = MI.getOpcode();
switch (Op) {
case AMDGPU::V_MAX_F32_e64:
- case AMDGPU::V_MAX_F16_e64: {
+ case AMDGPU::V_MAX_F16_e64:
+ case AMDGPU::V_MAX_F64: {
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
return nullptr;
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=295908&r1=295907&r2=295908&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Feb 22 17:53:37 2017
@@ -4055,7 +4055,8 @@ SDValue SITargetLowering::performFPMed3I
}
// No med3 for f16, but clamp is possible.
- if (VT == MVT::f16)
+ // TODO: gfx9 has med3 f16
+ if (VT == MVT::f16 || VT == MVT::f64)
return SDValue();
// This isn't safe with signaling NaNs because in IEEE mode, min/max on a
@@ -4073,6 +4074,7 @@ SDValue SITargetLowering::performMinMaxC
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
unsigned Opc = N->getOpcode();
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -4080,7 +4082,9 @@ SDValue SITargetLowering::performMinMaxC
// Only do this if the inner op has one use since this will just increases
// register pressure for no benefit.
- if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY) {
+
+ if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
+ VT != MVT::f64) {
// max(max(a, b), c) -> max3(a, b, c)
// min(min(a, b), c) -> min3(a, b, c)
if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
@@ -4122,8 +4126,8 @@ SDValue SITargetLowering::performMinMaxC
if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) ||
(Opc == AMDGPUISD::FMIN_LEGACY &&
Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
- (N->getValueType(0) == MVT::f32 ||
- (N->getValueType(0) == MVT::f16 && Subtarget->has16BitInsts())) &&
+ (VT == MVT::f32 || VT == MVT::f64 ||
+ (VT == MVT::f16 && Subtarget->has16BitInsts())) &&
Op0.hasOneUse()) {
if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
return Res;
@@ -4404,7 +4408,6 @@ SDValue SITargetLowering::PerformDAGComb
case AMDGPUISD::FMIN_LEGACY:
case AMDGPUISD::FMAX_LEGACY: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
- N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
return performMinMaxCombine(N, DCI);
break;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=295908&r1=295907&r2=295908&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Wed Feb 22 17:53:37 2017
@@ -657,8 +657,8 @@ class ClampPat<Instruction inst, ValueTy
i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, $omod)
>;
-// TODO: Does f64 support clamp?
def : ClampPat<V_MAX_F32_e64, f32>;
+def : ClampPat<V_MAX_F64, f64>;
def : ClampPat<V_MAX_F16_e64, f16>;
/********** ================================ **********/
Modified: llvm/trunk/test/CodeGen/AMDGPU/clamp-modifier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/clamp-modifier.ll?rev=295908&r1=295907&r2=295908&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/clamp-modifier.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/clamp-modifier.ll Wed Feb 22 17:53:37 2017
@@ -153,6 +153,21 @@ define amdgpu_kernel void @v_clamp_add_s
ret void
}
+; GCN-LABEL: {{^}}v_clamp_add_src_f64:
+; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
+; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], 1.0 clamp{{$}}
+define amdgpu_kernel void @v_clamp_add_src_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %a = load double, double addrspace(1)* %gep0
+ %add = fadd double %a, 1.0
+ %max = call double @llvm.maxnum.f64(double %add, double 0.0)
+ %clamp = call double @llvm.minnum.f64(double %max, double 1.0)
+ store double %clamp, double addrspace(1)* %out.gep
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare float @llvm.fabs.f32(float) #1
declare float @llvm.floor.f32(float) #1
Modified: llvm/trunk/test/CodeGen/AMDGPU/clamp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/clamp.ll?rev=295908&r1=295907&r2=295908&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/clamp.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/clamp.ll Wed Feb 22 17:53:37 2017
@@ -147,8 +147,7 @@ define amdgpu_kernel void @v_clamp_negab
; FIXME: Do f64 instructions support clamp?
; GCN-LABEL: {{^}}v_clamp_f64:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_max_f64
-; GCN: v_min_f64
+; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
@@ -163,8 +162,7 @@ define amdgpu_kernel void @v_clamp_f64(d
; GCN-LABEL: {{^}}v_clamp_neg_f64:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_max_f64
-; GCN: v_min_f64
+; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
@@ -180,8 +178,7 @@ define amdgpu_kernel void @v_clamp_neg_f
; GCN-LABEL: {{^}}v_clamp_negabs_f64:
; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
-; GCN: v_max_f64
-; GCN: v_min_f64
+; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -|[[A]]|, -|[[A]]| clamp{{$}}
define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
More information about the llvm-commits
mailing list