[PATCH] D64920: AMDGPU: Use tablegen pattern for sendmsg intrinsics
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 18 06:33:50 PDT 2019
arsenm created this revision.
arsenm added reviewers: nhaehnle, rampitec, tstellar.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, wdng, jvesely, kzhuravl.
arsenm added a parent revision: D64919: TableGen: Support physical register inputs > 255.
Since this now emits a direct copy to m0, SIFixSGPRCopies has to
handle a physical register.
https://reviews.llvm.org/D64920
Files:
include/llvm/IR/IntrinsicsAMDGPU.td
lib/Target/AMDGPU/SIFixSGPRCopies.cpp
lib/Target/AMDGPU/SIISelLowering.cpp
lib/Target/AMDGPU/SOPInstructions.td
Index: lib/Target/AMDGPU/SOPInstructions.td
===================================================================
--- lib/Target/AMDGPU/SOPInstructions.td
+++ lib/Target/AMDGPU/SOPInstructions.td
@@ -1110,12 +1110,11 @@
let Uses = [EXEC, M0] in {
// FIXME: Should this be mayLoad+mayStore?
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
- [(AMDGPUsendmsg (i32 imm:$simm16))]
->;
+ [(int_amdgcn_s_sendmsg (i32 imm:$simm16), M0)]>;
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
- [(AMDGPUsendmsghalt (i32 imm:$simm16))]
->;
+ [(int_amdgcn_s_sendmsghalt (i32 imm:$simm16), M0)]>;
+
} // End Uses = [EXEC, M0]
def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16"> {
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6686,15 +6686,6 @@
AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
}
- case Intrinsic::amdgcn_s_sendmsg:
- case Intrinsic::amdgcn_s_sendmsghalt: {
- unsigned NodeOp = (IntrinsicID == Intrinsic::amdgcn_s_sendmsg) ?
- AMDGPUISD::SENDMSG : AMDGPUISD::SENDMSGHALT;
- Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
- SDValue Glue = Chain.getValue(1);
- return DAG.getNode(NodeOp, DL, MVT::Other, Chain,
- Op.getOperand(2), Glue);
- }
case Intrinsic::amdgcn_init_exec: {
return DAG.getNode(AMDGPUISD::INIT_EXEC, DL, MVT::Other, Chain,
Op.getOperand(2));
Index: lib/Target/AMDGPU/SIFixSGPRCopies.cpp
===================================================================
--- lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -618,13 +618,29 @@
case AMDGPU::COPY:
case AMDGPU::WQM:
case AMDGPU::WWM: {
- // If the destination register is a physical register there isn't really
- // much we can do to fix this.
- if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
- continue;
+ Register DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
+
+ if (!TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ // If the destination register is a physical register there isn't
+ // really much we can do to fix this.
+ Register TmpReg
+ = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+ // Some special instructions use M0 as an input. Some even only use
+ // the first lane. Insert a readfirstlane and hope for the best.
+ if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
+ BuildMI(MBB, MI, MI.getDebugLoc(),
+ TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
+ .add(MI.getOperand(1));
+ MI.getOperand(1).setReg(TmpReg);
+ }
+
+ continue;
+ }
+
if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
unsigned SrcReg = MI.getOperand(1).getReg();
if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
Index: include/llvm/IR/IntrinsicsAMDGPU.td
===================================================================
--- include/llvm/IR/IntrinsicsAMDGPU.td
+++ include/llvm/IR/IntrinsicsAMDGPU.td
@@ -199,9 +199,11 @@
// The first parameter is s_sendmsg immediate (i16),
// the second one is copied to m0
def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
- Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
+ Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
- Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>, IntrInaccessibleMemOnly]>;
+ Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
+ [ImmArg<0>, IntrNoMem, IntrHasSideEffects]>;
def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
Intrinsic<[], [], [IntrConvergent]>;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D64920.210549.patch
Type: text/x-patch
Size: 4213 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190718/19075920/attachment.bin>
More information about the llvm-commits
mailing list