[llvm] eef92f2 - AMDGPU: Remove custom node for exports
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 15 15:33:23 PST 2020
Author: Matt Arsenault
Date: 2020-01-15T18:33:15-05:00
New Revision: eef92f25ccf186935008c98f8147ad2ee178dcbb
URL: https://github.com/llvm/llvm-project/commit/eef92f25ccf186935008c98f8147ad2ee178dcbb
DIFF: https://github.com/llvm/llvm-project/commit/eef92f25ccf186935008c98f8147ad2ee178dcbb.diff
LOG: AMDGPU: Remove custom node for exports
I'm mildly worried about potentially reordering exp/exp_done with
IntrWriteMem on the intrinsic.
Requires hacking out the illegal type on SI, so manually select that
case during lowering.
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SIInstructions.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 07ca3a9229d6..385ea6d8c9c5 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1156,7 +1156,7 @@ def int_amdgcn_exp : Intrinsic <[], [
llvm_i1_ty, // done
llvm_i1_ty // vm
],
- [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrInaccessibleMemOnly]
+ [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrWriteMem, IntrInaccessibleMemOnly]
>;
// exp with compr bit set.
@@ -1167,7 +1167,7 @@ def int_amdgcn_exp_compr : Intrinsic <[], [
LLVMMatchType<0>, // src1
llvm_i1_ty, // done
llvm_i1_ty], // vm
- [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrInaccessibleMemOnly]
+ [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrWriteMem, IntrInaccessibleMemOnly]
>;
def int_amdgcn_buffer_wbinvl1_sc :
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 23cc9404532d..c985ba67b279 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4298,8 +4298,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MAD_U64_U32)
NODE_NAME_CASE(PERM)
NODE_NAME_CASE(TEXTURE_FETCH)
- NODE_NAME_CASE(EXPORT)
- NODE_NAME_CASE(EXPORT_DONE)
NODE_NAME_CASE(R600_EXPORT)
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index a90b7f5653dc..087983e6cf18 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -433,8 +433,6 @@ enum NodeType : unsigned {
MUL_LOHI_U24,
PERM,
TEXTURE_FETCH,
- EXPORT, // exp on SI+
- EXPORT_DONE, // exp on SI+ with done bit set
R600_EXPORT,
CONST_ADDRESS,
REGISTER_LOAD,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 50c451be4b86..4d7c6b6b57ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -358,12 +358,6 @@ def AMDGPUExportOp : SDTypeProfile<0, 8, [
]>;
-def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp,
- [SDNPHasChain, SDNPMayStore]>;
-
-def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp,
- [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
-
def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 52bee393fae8..1487920aac21 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6782,52 +6782,29 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
switch (IntrinsicID) {
- case Intrinsic::amdgcn_exp: {
- const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
- const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
- const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(8));
- const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(9));
-
- const SDValue Ops[] = {
- Chain,
- DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
- DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
- Op.getOperand(4), // src0
- Op.getOperand(5), // src1
- Op.getOperand(6), // src2
- Op.getOperand(7), // src3
- DAG.getTargetConstant(0, DL, MVT::i1), // compr
- DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
- };
-
- unsigned Opc = Done->isNullValue() ?
- AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
- return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
- }
case Intrinsic::amdgcn_exp_compr: {
- const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
- const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
SDValue Src0 = Op.getOperand(4);
SDValue Src1 = Op.getOperand(5);
- const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
- const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(7));
+ // Hack around illegal type on SI by directly selecting it.
+ if (isTypeLegal(Src0.getValueType()))
+ return SDValue();
+ const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
SDValue Undef = DAG.getUNDEF(MVT::f32);
const SDValue Ops[] = {
- Chain,
- DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
- DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
- DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0),
- DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1),
+ Op.getOperand(2), // tgt
+ DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), // src0
+ DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), // src1
Undef, // src2
Undef, // src3
+ Op.getOperand(7), // vm
DAG.getTargetConstant(1, DL, MVT::i1), // compr
- DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
+ Op.getOperand(3), // en
+ Op.getOperand(0) // Chain
};
- unsigned Opc = Done->isNullValue() ?
- AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
- return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
+ unsigned Opc = Done->isNullValue() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
+ return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
}
case Intrinsic::amdgcn_s_barrier: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 85e8d0582dcd..02004c2739a3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1101,7 +1101,7 @@ def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
-def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
+def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
}
@@ -1380,24 +1380,21 @@ class SIMCInstr <string pseudo, int subtarget> {
// EXP classes
//===----------------------------------------------------------------------===//
-class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
+class EXP_Helper<bit done> : EXPCommon<
(outs),
(ins exp_tgt:$tgt,
ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
- exp_vm:$vm, exp_compr:$compr, i8imm:$en),
- "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
- [(node (i8 timm:$tgt), (i8 timm:$en),
- f32:$src0, f32:$src1, f32:$src2, f32:$src3,
- (i1 timm:$compr), (i1 timm:$vm))]> {
+ exp_vm:$vm, exp_compr:$compr, i32imm:$en),
+ "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", []> {
let AsmMatchConverter = "cvtExp";
}
// Split EXP instruction into EXP and EXP_DONE so we can set
// mayLoad for done=1.
-multiclass EXP_m<bit done, SDPatternOperator node> {
+multiclass EXP_m<bit done> {
let mayLoad = done, DisableWQM = 1 in {
let isPseudo = 1, isCodeGenOnly = 1 in {
- def "" : EXP_Helper<done, node>,
+ def "" : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d84720f820ee..7660cbd0962d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -24,8 +24,41 @@ include "BUFInstructions.td"
// EXP Instructions
//===----------------------------------------------------------------------===//
-defm EXP : EXP_m<0, AMDGPUexport>;
-defm EXP_DONE : EXP_m<1, AMDGPUexport_done>;
+defm EXP : EXP_m<0>;
+defm EXP_DONE : EXP_m<1>;
+
+// FIXME: GlobalISel successfully imports this pattern, but fails to
+// select because the i1 done_val does a type check on done_val, which
+// only works on register operands.
+class ExpPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
+ (int_amdgcn_exp timm:$tgt, timm:$en,
+ (vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
+ (vt ExpSrc2:$src2), (vt ExpSrc3:$src3),
+ done_val, timm:$vm),
+ (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
+ ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en)
+>;
+
+class ExpComprPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
+ (int_amdgcn_exp_compr timm:$tgt, timm:$en,
+ (vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
+ done_val, timm:$vm),
+ (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
+ (IMPLICIT_DEF), (IMPLICIT_DEF), timm:$vm, 1, timm:$en)
+>;
+
+// FIXME: The generated DAG matcher seems to have strange behavior
+// with a 1-bit literal to match, so use a -1 for checking a true
+// 1-bit value.
+def : ExpPattern<i32, EXP, 0>;
+def : ExpPattern<i32, EXP_DONE, -1>;
+def : ExpPattern<f32, EXP, 0>;
+def : ExpPattern<f32, EXP_DONE, -1>;
+
+def : ExpComprPattern<v2i16, EXP, 0>;
+def : ExpComprPattern<v2i16, EXP_DONE, -1>;
+def : ExpComprPattern<v2f16, EXP, 0>;
+def : ExpComprPattern<v2f16, EXP_DONE, -1>;
//===----------------------------------------------------------------------===//
// VINTRP Instructions
@@ -1782,15 +1815,6 @@ def : GCNPat <
SRCMODS.NONE, $src2, $clamp, $omod)
>;
-// Allow integer inputs
-class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPat<
- (node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)),
- (Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en)
->;
-
-def : ExpPattern<AMDGPUexport, i32, EXP>;
-def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
-
// COPY is workaround tablegen bug from multiple outputs
// from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat <
More information about the llvm-commits
mailing list