[llvm] 68b102b - AMDGPU: Directly select 16-bank LDS case of llvm.amdgcn.interp.p1.f16
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 08:36:22 PST 2020
Author: Matt Arsenault
Date: 2020-01-29T08:24:31-08:00
New Revision: 68b102b97ac340698e31ab5af0c4394a9789a49c
URL: https://github.com/llvm/llvm-project/commit/68b102b97ac340698e31ab5af0c4394a9789a49c
DIFF: https://github.com/llvm/llvm-project/commit/68b102b97ac340698e31ab5af0c4394a9789a49c.diff
LOG: AMDGPU: Directly select 16-bank LDS case of llvm.amdgcn.interp.p1.f16
Manually select this is as a tablegen workraound. Both SelectionDAG
and GlobalISel end up misplacing the copy to m0 when both instructions
in the output need it. Neither considers that both output instructions
depend on m0. I don't know of any other pattern we need to handle this
case, so it's less effort to just workaround this for now.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/VOP3Instructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 07f500b136e8..ae9221d9994e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -300,6 +300,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
void SelectATOMIC_CMP_SWAP(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
+ void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
void SelectINTRINSIC_WO_CHAIN(SDNode *N);
void SelectINTRINSIC_VOID(SDNode *N);
@@ -2339,6 +2340,64 @@ void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
+void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
+ if (Subtarget->getLDSBankCount() != 16) {
+ // This is a single instruction with a pattern.
+ SelectCode(N);
+ return;
+ }
+
+ SDLoc DL(N);
+
+ // This requires 2 instructions. It is possible to write a pattern to support
+ // this, but the generated isel emitter doesn't correctly deal with multiple
+ // output instructions using the same physical register input. The copy to m0
+ // is incorrectly placed before the second instruction.
+ //
+ // TODO: Match source modifiers.
+ //
+ // def : Pat <
+ // (int_amdgcn_interp_p1_f16
+ // (VOP3Mods f32:$src0, i32:$src0_modifiers),
+ // (i32 timm:$attrchan), (i32 timm:$attr),
+ // (i1 timm:$high), M0),
+ // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
+ // timm:$attrchan, 0,
+ // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
+ // let Predicates = [has16BankLDS];
+ // }
+
+ // 16 bank LDS
+ SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
+ N->getOperand(5), SDValue());
+
+ SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
+
+ SDNode *InterpMov =
+ CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
+ CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
+ N->getOperand(3), // Attr
+ N->getOperand(2), // Attrchan
+ ToM0.getValue(1) // In glue
+ });
+
+ SDNode *InterpP1LV =
+ CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
+ CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
+ N->getOperand(1), // Src0
+ N->getOperand(3), // Attr
+ N->getOperand(2), // Attrchan
+ CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
+ SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
+ N->getOperand(4), // high
+ CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
+ CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
+ SDValue(InterpMov, 1)
+ });
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
+}
+
void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntrID) {
@@ -2367,6 +2426,9 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_wwm:
Opcode = AMDGPU::WWM;
break;
+ case Intrinsic::amdgcn_interp_p1_f16:
+ SelectInterpP1F16(N);
+ return;
default:
SelectCode(N);
return;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9232b65cd32d..927e17390118 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4323,9 +4323,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(KILL)
NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
- NODE_NAME_CASE(INTERP_P1LL_F16)
- NODE_NAME_CASE(INTERP_P1LV_F16)
- NODE_NAME_CASE(INTERP_P2_F16)
NODE_NAME_CASE(LOAD_D16_HI)
NODE_NAME_CASE(LOAD_D16_LO)
NODE_NAME_CASE(LOAD_D16_HI_I8)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 087983e6cf18..7b269e8b21e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -474,9 +474,6 @@ enum NodeType : unsigned {
BUILD_VERTICAL_VECTOR,
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
- INTERP_P1LL_F16,
- INTERP_P1LV_F16,
- INTERP_P2_F16,
PC_ADD_REL_OFFSET,
LDS,
KILL,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 4d7c6b6b57ee..56eb2e9891bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -329,18 +329,6 @@ def AMDGPUfdot2 : SDNode<"AMDGPUISD::FDOT2",
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
-def AMDGPUinterp_p1ll_f16 : SDNode<"AMDGPUISD::INTERP_P1LL_F16",
- SDTypeProfile<1, 7, [SDTCisFP<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-
-def AMDGPUinterp_p1lv_f16 : SDNode<"AMDGPUISD::INTERP_P1LV_F16",
- SDTypeProfile<1, 9, [SDTCisFP<0>]>,
- [SDNPInGlue, SDNPOutGlue]>;
-
-def AMDGPUinterp_p2_f16 : SDNode<"AMDGPUISD::INTERP_P2_F16",
- SDTypeProfile<1, 8, [SDTCisFP<0>]>,
- [SDNPInGlue]>;
-
def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT,
[SDNPHasChain, SDNPSideEffect]>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index dd4a9960f6ee..48f2bad68911 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5872,38 +5872,6 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
- case Intrinsic::amdgcn_interp_p1_f16: {
- if (getSubtarget()->getLDSBankCount() == 16) {
- // 16 bank LDS
- SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
- Op.getOperand(5), SDValue());
-
- // FIXME: This implicitly will insert a second CopyToReg to M0.
- SDValue S = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, MVT::f32,
- DAG.getTargetConstant(Intrinsic::amdgcn_interp_mov, DL, MVT::i32),
- DAG.getTargetConstant(2, DL, MVT::i32), // P0
- Op.getOperand(2), // Attrchan
- Op.getOperand(3), // Attr
- Op.getOperand(5)); // m0
-
- SDValue Ops[] = {
- Op.getOperand(1), // Src0
- Op.getOperand(2), // Attrchan
- Op.getOperand(3), // Attr
- DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
- S, // Src2 - holds two f16 values selected by high
- DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
- Op.getOperand(4), // high
- DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
- DAG.getTargetConstant(0, DL, MVT::i32), // $omod
- ToM0.getValue(1)
- };
- return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
- }
-
- return SDValue();
- }
case Intrinsic::amdgcn_sin:
return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1));
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 0141f4fa0735..176d0e06438c 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -491,15 +491,7 @@ def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32
}
-def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
- [(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan),
- (i32 timm:$attr),
- (i32 timm:$src0_modifiers),
- (f32 VRegSrc_32:$src2),
- (i32 timm:$src2_modifiers),
- (i1 timm:$high),
- (i1 timm:$clamp),
- (i32 timm:$omod)))]>;
+def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>;
} // End Uses = [M0, EXEC], FPDPRounding = 1
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
More information about the llvm-commits
mailing list