[llvm] r288695 - AMDGPU: Refactor exp instructions
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 5 12:23:10 PST 2016
Author: arsenm
Date: Mon Dec 5 14:23:10 2016
New Revision: 288695
URL: http://llvm.org/viewvc/llvm-project?rev=288695&view=rev
Log:
AMDGPU: Refactor exp instructions
Structure the definitions a bit more like the other classes.
The main change here is to split EXP with the done bit set
to a separate opcode, so we can set mayLoad = 1 so that it won't
be reordered before the other exp stores, since this has the special
constraint that if the done bit is set then this should be the last
exp in she shader.
Previously all exp instructions were inferred to have unmodeled
side effects.
Added:
llvm/trunk/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
llvm/trunk/lib/Target/AMDGPU/SIDefines.h
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp
llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIIntrinsics.td
llvm/trunk/test/CodeGen/AMDGPU/ret.ll
llvm/trunk/test/CodeGen/MIR/AMDGPU/movrels-bug.mir
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Mon Dec 5 14:23:10 2016
@@ -2999,6 +2999,8 @@ const char* AMDGPUTargetLowering::getTar
NODE_NAME_CASE(MAD_I24)
NODE_NAME_CASE(TEXTURE_FETCH)
NODE_NAME_CASE(EXPORT)
+ NODE_NAME_CASE(EXPORT_DONE)
+ NODE_NAME_CASE(R600_EXPORT)
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
NODE_NAME_CASE(REGISTER_STORE)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Mon Dec 5 14:23:10 2016
@@ -280,7 +280,9 @@ enum NodeType : unsigned {
MUL_LOHI_I24,
MUL_LOHI_U24,
TEXTURE_FETCH,
- EXPORT,
+ EXPORT, // exp on SI+
+ EXPORT_DONE, // exp on SI+ with done bit set
+ R600_EXPORT,
CONST_ADDRESS,
REGISTER_LOAD,
REGISTER_STORE,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.td Mon Dec 5 14:23:10 2016
@@ -265,9 +265,35 @@ def AMDGPUinterp_p2 : SDNode<"AMDGPUISD:
SDTypeProfile<1, 4, [SDTCisFP<0>]>,
[SDNPInGlue]>;
+
def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT,
[SDNPHasChain, SDNPSideEffect]>;
+// SI+ export
+def AMDGPUExportOp : SDTypeProfile<0, 8, [
+ SDTCisInt<0>, // i8 en
+ SDTCisInt<1>, // i1 vm
+ // skip done
+ SDTCisInt<2>, // i8 tgt
+ SDTCisSameAs<3, 1>, // i1 compr
+ SDTCisFP<4>, // f32 src0
+ SDTCisSameAs<5, 4>, // f32 src1
+ SDTCisSameAs<6, 4>, // f32 src2
+ SDTCisSameAs<7, 4> // f32 src3
+]>;
+
+def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp,
+ [SDNPHasChain, SDNPMayStore]>;
+
+def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+
+def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
+
+def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp,
+ [SDNPHasChain, SDNPSideEffect]>;
+
//===----------------------------------------------------------------------===//
// Flow Control Profile Types
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Mon Dec 5 14:23:10 2016
@@ -443,7 +443,7 @@ SDValue R600TargetLowering::LowerOperati
DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
DAG.getConstant(3, DL, MVT::i32) // SWZ_W
};
- return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
+ return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
}
// default for switch(IntrinsicID)
@@ -1882,7 +1882,7 @@ SDValue R600TargetLowering::PerformDAGCo
return SDValue();
}
- case AMDGPUISD::EXPORT: {
+ case AMDGPUISD::R600_EXPORT: {
SDValue Arg = N->getOperand(1);
if (Arg.getOpcode() != ISD::BUILD_VECTOR)
break;
@@ -1898,7 +1898,7 @@ SDValue R600TargetLowering::PerformDAGCo
N->getOperand(7) // SWZ_W
};
NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
- return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
+ return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
}
case AMDGPUISD::TEXTURE_FETCH: {
SDValue Arg = N->getOperand(1);
Modified: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600Instructions.td?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td Mon Dec 5 14:23:10 2016
@@ -436,11 +436,6 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "IN
// Export Instructions
//===----------------------------------------------------------------------===//
-def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
-
-def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
- [SDNPHasChain, SDNPSideEffect]>;
-
class ExportWord0 {
field bits<32> Word0;
@@ -486,7 +481,7 @@ class ExportBufWord1 {
}
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
- def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
+ def : Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
(i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
(ExportInst R600_Reg128:$src, imm:$type, imm:$base,
imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Mon Dec 5 14:23:10 2016
@@ -15,7 +15,7 @@
namespace SIInstrFlags {
// This needs to be kept in sync with the field bits in InstSI.
-enum {
+enum : uint32_t {
SALU = 1 << 3,
VALU = 1 << 4,
@@ -38,15 +38,16 @@ enum {
DS = 1 << 19,
MIMG = 1 << 20,
FLAT = 1 << 21,
- WQM = 1 << 22,
- VGPRSpill = 1 << 23,
- SGPRSpill = 1 << 24,
- VOPAsmPrefer32Bit = 1 << 25,
- Gather4 = 1 << 26,
- DisableWQM = 1 << 27,
- SOPK_ZEXT = 1 << 28,
- SCALAR_STORE = 1 << 29,
- FIXED_SIZE = 1 << 30
+ EXP = 1 << 22,
+ WQM = 1 << 23,
+ VGPRSpill = 1 << 24,
+ SGPRSpill = 1 << 25,
+ VOPAsmPrefer32Bit = 1 << 26,
+ Gather4 = 1 << 27,
+ DisableWQM = 1 << 28,
+ SOPK_ZEXT = 1 << 29,
+ SCALAR_STORE = 1 << 30,
+ FIXED_SIZE = 1u << 31
};
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Mon Dec 5 14:23:10 2016
@@ -2683,6 +2683,29 @@ SDValue SITargetLowering::LowerINTRINSIC
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
}
+ case AMDGPUIntrinsic::SI_export: {
+ const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(2));
+ const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(3));
+ const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(4));
+ const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(5));
+ const ConstantSDNode *Compr = cast<ConstantSDNode>(Op.getOperand(6));
+
+ const SDValue Ops[] = {
+ Chain,
+ DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8),
+ DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1),
+ DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8),
+ DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i1),
+ Op.getOperand(7), // src0
+ Op.getOperand(8), // src1
+ Op.getOperand(9), // src2
+ Op.getOperand(10) // src3
+ };
+
+ unsigned Opc = Done->isNullValue() ?
+ AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
+ return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
+ }
default:
return SDValue();
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertSkips.cpp Mon Dec 5 14:23:10 2016
@@ -159,16 +159,15 @@ bool SIInsertSkips::skipIfDead(MachineIn
MachineBasicBlock::iterator Insert = SkipBB->begin();
// Exec mask is zero: Export to NULL target...
- BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP))
- .addImm(0)
+ BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE))
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
- .addImm(0)
- .addImm(1)
- .addImm(1)
.addReg(AMDGPU::VGPR0, RegState::Undef)
.addReg(AMDGPU::VGPR0, RegState::Undef)
.addReg(AMDGPU::VGPR0, RegState::Undef)
- .addReg(AMDGPU::VGPR0, RegState::Undef);
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addImm(1) // vm
+ .addImm(0) // compr
+ .addImm(0); // en
// ... and terminate wavefront.
BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp Mon Dec 5 14:23:10 2016
@@ -195,8 +195,7 @@ Counters SIInsertWaits::getHwCounts(Mach
Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
// Only consider stores or EXP for EXP_CNT
- Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
- (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
+ Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT) && MI.mayStore();
// LGKM may uses larger values
if (TSFlags & SIInstrFlags::LGKM_CNT) {
@@ -238,9 +237,10 @@ bool SIInsertWaits::isOpRelevant(Machine
if (Op.isDef())
return true;
- // For exports all registers are relevant
+ // For exports all registers are relevant.
+ // TODO: Skip undef/disabled registers.
MachineInstr &MI = *Op.getParent();
- if (MI.getOpcode() == AMDGPU::EXP)
+ if (TII->isEXP(MI))
return true;
// For stores the stored value is also relevant
@@ -340,7 +340,7 @@ void SIInsertWaits::pushInstruction(Mach
// Remember which export instructions we have seen
if (Increment.Named.EXP) {
- ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
+ ExpInstrTypesSeen |= TII->isEXP(*I) ? 1 : 2;
}
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Mon Dec 5 14:23:10 2016
@@ -41,6 +41,7 @@ class InstSI <dag outs, dag ins, string
field bit DS = 0;
field bit MIMG = 0;
field bit FLAT = 0;
+ field bit EXP = 0;
// Whether WQM _must_ be enabled for this instruction.
field bit WQM = 0;
@@ -96,15 +97,16 @@ class InstSI <dag outs, dag ins, string
let TSFlags{19} = DS;
let TSFlags{20} = MIMG;
let TSFlags{21} = FLAT;
- let TSFlags{22} = WQM;
- let TSFlags{23} = VGPRSpill;
- let TSFlags{24} = SGPRSpill;
- let TSFlags{25} = VOPAsmPrefer32Bit;
- let TSFlags{26} = Gather4;
- let TSFlags{27} = DisableWQM;
- let TSFlags{28} = SOPKZext;
- let TSFlags{29} = ScalarStore;
- let TSFlags{30} = FixedSize;
+ let TSFlags{22} = EXP;
+ let TSFlags{23} = WQM;
+ let TSFlags{24} = VGPRSpill;
+ let TSFlags{25} = SGPRSpill;
+ let TSFlags{26} = VOPAsmPrefer32Bit;
+ let TSFlags{27} = Gather4;
+ let TSFlags{28} = DisableWQM;
+ let TSFlags{29} = SOPKZext;
+ let TSFlags{30} = ScalarStore;
+ let TSFlags{31} = FixedSize;
let SchedRW = [Write32Bit];
@@ -232,6 +234,17 @@ class VINTRPCommon <dag outs, dag ins, s
let hasSideEffects = 0;
}
+class EXPCommon<dag outs, dag ins, string asm, list<dag> pattern> :
+ InstSI<outs, ins, asm, pattern> {
+ let EXP = 1;
+ let EXP_CNT = 1;
+ let mayLoad = 0; // Set to 1 if done bit is set.
+ let mayStore = 1;
+ let UseNamedOperandTable = 1;
+ let Uses = [EXEC];
+ let SchedRW = [WriteExport];
+}
+
} // End Uses = [EXEC]
class MIMG <dag outs, dag ins, string asm, list<dag> pattern> :
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Mon Dec 5 14:23:10 2016
@@ -372,6 +372,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
}
+ static bool isEXP(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::EXP;
+ }
+
+ bool isEXP(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::EXP;
+ }
+
static bool isWQM(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::WQM;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Mon Dec 5 14:23:10 2016
@@ -518,32 +518,39 @@ class SIMCInstr <string pseudo, int subt
// EXP classes
//===----------------------------------------------------------------------===//
-class EXPCommon : InstSI<
+class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
(outs),
- (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
- VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3),
- "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
- [] > {
-
- let EXP_CNT = 1;
- let Uses = [EXEC];
- let SchedRW = [WriteExport];
-}
-
-multiclass EXP_m {
-
- let isPseudo = 1, isCodeGenOnly = 1 in {
- def "" : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.NONE> ;
- }
-
- def _si : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.SI>, EXPe {
- let DecoderNamespace="SICI";
- let DisableDecoder = DisableSIDecoder;
- }
-
- def _vi : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.VI>, EXPe_vi {
- let DecoderNamespace="VI";
- let DisableDecoder = DisableVIDecoder;
+ (ins i8imm:$tgt, VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3,
+ i1imm:$vm, i1imm:$compr, i8imm:$en),
+ "exp $en, $tgt, $compr, "#!if(done, "1", "0")#", $vm, $src0, $src1, $src2, $src3",
+ [(node (i8 timm:$en), (i1 timm:$vm), (i8 timm:$tgt), (i1 timm:$compr),
+ f32:$src0, f32:$src1, f32:$src2, f32:$src3)]
+>;
+
+// Split EXP instruction into EXP and EXP_DONE so we can set
+// mayLoad for done=1.
+multiclass EXP_m<bit done, SDPatternOperator node> {
+ let mayLoad = done in {
+ let isPseudo = 1, isCodeGenOnly = 1 in {
+ def "" : EXP_Helper<done, node>,
+ SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
+ }
+
+ let done = done in {
+ def _si : EXP_Helper<done>,
+ SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
+ EXPe {
+ let DecoderNamespace = "SICI";
+ let DisableDecoder = DisableSIDecoder;
+ }
+
+ def _vi : EXP_Helper<done>,
+ SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
+ EXPe_vi {
+ let DecoderNamespace = "VI";
+ let DisableDecoder = DisableVIDecoder;
+ }
+ }
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Mon Dec 5 14:23:10 2016
@@ -37,7 +37,8 @@ let SubtargetPredicate = isGCN in {
// EXP Instructions
//===----------------------------------------------------------------------===//
-defm EXP : EXP_m;
+defm EXP : EXP_m<0, AMDGPUexport>;
+defm EXP_DONE : EXP_m<1, AMDGPUexport_done>;
//===----------------------------------------------------------------------===//
// VINTRP Instructions
@@ -388,13 +389,6 @@ def : Pat <
(SI_KILL (i32 0xbf800000))
>;
-def : Pat <
- (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
- f32:$src0, f32:$src1, f32:$src2, f32:$src3),
- (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
- $src0, $src1, $src2, $src3)
->;
-
//===----------------------------------------------------------------------===//
// VOP1 Patterns
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/SIIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIIntrinsics.td?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIIntrinsics.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIIntrinsics.td Mon Dec 5 14:23:10 2016
@@ -15,7 +15,20 @@
let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+
+ def int_SI_export : Intrinsic <[],
+ [llvm_i32_ty, // en
+ llvm_i32_ty, // vm (FIXME: should be i1)
+ llvm_i32_ty, // done (FIXME: should be i1)
+ llvm_i32_ty, // tgt
+ llvm_i32_ty, // compr (FIXME: should be i1)
+ llvm_float_ty, // src0
+ llvm_float_ty, // src1
+ llvm_float_ty, // src2
+ llvm_float_ty], // src3
+ []
+ >;
+
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
Modified: llvm/trunk/test/CodeGen/AMDGPU/ret.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ret.ll?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ret.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ret.ll Mon Dec 5 14:23:10 2016
@@ -6,7 +6,7 @@ declare void @llvm.SI.export(i32, i32, i
; GCN-LABEL: {{^}}vgpr:
; GCN: v_mov_b32_e32 v1, v0
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
-; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
+; GCN-DAG: exp 15, 0, -1, 1, -1, v1, v1, v1, v1
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
@@ -19,7 +19,7 @@ define amdgpu_vs {float, float} @vgpr([9
; GCN-LABEL: {{^}}vgpr_literal:
; GCN: v_mov_b32_e32 v4, v0
-; GCN: exp 15, 0, 1, 1, 1, v4, v4, v4, v4
+; GCN: exp 15, 0, -1, 1, -1, v4, v4, v4, v4
; GCN-DAG: v_mov_b32_e32 v0, 1.0
; GCN-DAG: v_mov_b32_e32 v1, 2.0
; GCN-DAG: v_mov_b32_e32 v2, 4.0
@@ -209,7 +209,7 @@ define amdgpu_vs {i32, i32, i32, i32} @s
; GCN-LABEL: {{^}}both:
; GCN: v_mov_b32_e32 v1, v0
-; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
+; GCN-DAG: exp 15, 0, -1, 1, -1, v1, v1, v1, v1
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
; GCN-DAG: s_add_i32 s0, s3, 2
; GCN-DAG: s_mov_b32 s1, s2
@@ -231,7 +231,7 @@ define amdgpu_vs {float, i32, float, i32
; GCN-LABEL: {{^}}structure_literal:
; GCN: v_mov_b32_e32 v3, v0
-; GCN: exp 15, 0, 1, 1, 1, v3, v3, v3, v3
+; GCN: exp 15, 0, -1, 1, -1, v3, v3, v3, v3
; GCN-DAG: v_mov_b32_e32 v0, 1.0
; GCN-DAG: s_mov_b32 s0, 2
; GCN-DAG: s_mov_b32 s1, 3
Added: llvm/trunk/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir?rev=288695&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir (added)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir Mon Dec 5 14:23:10 2016
@@ -0,0 +1,63 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
+--- |
+ define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
+ %a = load volatile float, float addrspace(1)* undef
+ %b = load volatile float, float addrspace(1)* undef
+ %c = load volatile float, float addrspace(1)* undef
+ %d = load volatile float, float addrspace(1)* undef
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %a, float %b, float %c, float %d)
+ ret <4 x float> <float 5.000000e-01, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>
+ }
+
+ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+ attributes #0 = { readnone }
+ attributes #1 = { nounwind }
+
+...
+---
+
+# CHECK-LABEL: name: exp_done_waitcnt{{$}}
+# CHECK: EXP_DONE
+# CHECK-NEXT: S_WAITCNT 3855
+# CHECK: %vgpr0 = V_MOV_B32
+# CHECK: %vgpr1 = V_MOV_B32
+# CHECK: %vgpr2 = V_MOV_B32
+# CHECK: %vgpr3 = V_MOV_B32
+name: exp_done_waitcnt
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.2):
+ %sgpr3 = S_MOV_B32 61440
+ %sgpr2 = S_MOV_B32 -1
+ %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %vgpr2 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ EXP_DONE 0, killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3, -1, -1, 15, implicit %exec
+ %vgpr0 = V_MOV_B32_e32 1056964608, implicit %exec
+ %vgpr1 = V_MOV_B32_e32 1065353216, implicit %exec
+ %vgpr2 = V_MOV_B32_e32 1073741824, implicit %exec
+ %vgpr3 = V_MOV_B32_e32 1082130432, implicit %exec
+ SI_RETURN killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3
+
+...
Modified: llvm/trunk/test/CodeGen/MIR/AMDGPU/movrels-bug.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/MIR/AMDGPU/movrels-bug.mir?rev=288695&r1=288694&r2=288695&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/MIR/AMDGPU/movrels-bug.mir (original)
+++ llvm/trunk/test/CodeGen/MIR/AMDGPU/movrels-bug.mir Mon Dec 5 14:23:10 2016
@@ -25,7 +25,7 @@ body: |
%m0 = S_MOV_B32 undef %sgpr0
%vgpr1 = V_MOVRELS_B32_e32 undef %vgpr1, implicit %m0, implicit %exec, implicit killed %vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
%vgpr4 = V_MAC_F32_e32 undef %vgpr0, undef %vgpr0, undef %vgpr4, implicit %exec
- EXP 15, 12, 0, 1, 0, undef %vgpr0, killed %vgpr1, killed %vgpr4, undef %vgpr0, implicit %exec
+ EXP_DONE 15, undef %vgpr0, killed %vgpr1, killed %vgpr4, undef %vgpr0, 0, 0, 12, implicit %exec
S_ENDPGM
...
More information about the llvm-commits
mailing list