[llvm] f463792 - AMDGPU: Remove custom node for RSQ_LEGACY
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 17 16:50:46 PDT 2020
Author: Matt Arsenault
Date: 2020-04-17T19:50:36-04:00
New Revision: f4637925065d31c67433977656525c059e697678
URL: https://github.com/llvm/llvm-project/commit/f4637925065d31c67433977656525c059e697678
DIFF: https://github.com/llvm/llvm-project/commit/f4637925065d31c67433977656525c059e697678.diff
LOG: AMDGPU: Remove custom node for RSQ_LEGACY
Directly select from the intrinsic. This wasn't getting much value
from the custom node.
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/VOP1Instructions.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index b5f9685f5b7b..221d723c3e4a 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -275,6 +275,7 @@ def int_amdgcn_rsq_legacy : GCCBuiltin<"__builtin_amdgcn_rsq_legacy">,
[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]
>;
+// out = 1.0 / sqrt(a) result clamped to +/- max_float.
def int_amdgcn_rsq_clamp : Intrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 51f4132b1096..014bb5a1b4ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3000,6 +3000,16 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
case Intrinsic::amdgcn_mul_i24:
case Intrinsic::amdgcn_mul_u24:
return simplifyI24(N, DCI);
+ case Intrinsic::amdgcn_fract:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_rsq_legacy:
+ case Intrinsic::amdgcn_rsq_clamp:
+ case Intrinsic::amdgcn_ldexp: {
+ // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
+ SDValue Src = N->getOperand(1);
+ return Src.isUndef() ? Src : SDValue();
+ }
default:
return SDValue();
}
@@ -4258,7 +4268,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(RCP)
NODE_NAME_CASE(RSQ)
NODE_NAME_CASE(RCP_LEGACY)
- NODE_NAME_CASE(RSQ_LEGACY)
NODE_NAME_CASE(RCP_IFLAG)
NODE_NAME_CASE(FMUL_LEGACY)
NODE_NAME_CASE(RSQ_CLAMP)
@@ -4653,7 +4662,6 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
case AMDGPUISD::RCP:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_LEGACY:
- case AMDGPUISD::RSQ_LEGACY:
case AMDGPUISD::RSQ_CLAMP: {
if (SNaN)
return true;
@@ -4697,6 +4705,17 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
}
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_rsq_legacy:
+ case Intrinsic::amdgcn_rsq_clamp: {
+ if (SNaN)
+ return true;
+
+ // TODO: Need is known positive check.
+ return false;
+ }
case Intrinsic::amdgcn_fdot2:
// TODO: Refine on operand
return SNaN;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 7d0b17f7e816..d81b447d1a19 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -416,7 +416,6 @@ enum NodeType : unsigned {
RCP,
RSQ,
RCP_LEGACY,
- RSQ_LEGACY,
RCP_IFLAG,
FMUL_LEGACY,
RSQ_CLAMP,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index eae00f0867d3..b9ded3b196bb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -121,8 +121,6 @@ def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a)
def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
-// out = 1.0 / sqrt(a)
-def AMDGPUrsq_legacy_impl : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
def AMDGPUrcp_legacy_impl : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>;
def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>;
@@ -385,9 +383,6 @@ def AMDGPUrcp : PatFrags<(ops node:$src), [(int_amdgcn_rcp node:$src),
def AMDGPUrcp_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rcp_legacy node:$src),
(AMDGPUrcp_legacy_impl node:$src)]>;
-def AMDGPUrsq_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rsq_legacy node:$src),
- (AMDGPUrsq_legacy_impl node:$src)]>;
-
def AMDGPUrsq : PatFrags<(ops node:$src), [(int_amdgcn_rsq node:$src),
(AMDGPUrsq_impl node:$src)]>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a0260f5d57e7..1bdd2ed8111c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5842,8 +5842,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_rsq_legacy:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
-
- return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+ return SDValue();
case Intrinsic::amdgcn_rcp_legacy:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
@@ -8777,7 +8776,6 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case AMDGPUISD::RSQ:
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::RCP_LEGACY:
- case AMDGPUISD::RSQ_LEGACY:
case AMDGPUISD::RCP_IFLAG:
case AMDGPUISD::TRIG_PREOP:
case AMDGPUISD::DIV_SCALE:
@@ -8882,6 +8880,11 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_fdot2:
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rsq:
+ case Intrinsic::amdgcn_rsq_clamp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_rsq_legacy:
return true;
default:
break;
@@ -10068,10 +10071,10 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::FRACT:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_LEGACY:
- case AMDGPUISD::RSQ_LEGACY:
case AMDGPUISD::RCP_IFLAG:
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::LDEXP: {
+ // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
SDValue Src = N->getOperand(0);
if (Src.isUndef())
return Src;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 95f0c0e02aec..d8578578dc35 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -325,7 +325,7 @@ let SubtargetPredicate = isGFX6GFX7 in {
defm V_RSQ_CLAMP_F32 :
VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
defm V_RSQ_LEGACY_F32 :
- VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
+ VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>;
} // End SchedRW = [WriteQuarterRate32]
let SchedRW = [WriteDouble] in {
More information about the llvm-commits
mailing list