[llvm] 0e8f505 - [AMDGPU] Mark V_SET_INACTIVE as defining SCC
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 28 16:47:15 PST 2021
Author: Carl Ritson
Date: 2021-01-29T09:46:41+09:00
New Revision: 0e8f50595e22eb3ca8762c16d07e2fba5757cb71
URL: https://github.com/llvm/llvm-project/commit/0e8f50595e22eb3ca8762c16d07e2fba5757cb71
DIFF: https://github.com/llvm/llvm-project/commit/0e8f50595e22eb3ca8762c16d07e2fba5757cb71.diff
LOG: [AMDGPU] Mark V_SET_INACTIVE as defining SCC
V_SET_INACTIVE is implemented with S_NOT which clobbers SCC.
Mark sure it is marked appropriately.
Reviewed By: piotr
Differential Revision: https://reviews.llvm.org/D95509
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
llvm/test/CodeGen/AMDGPU/wqm.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index dfd0075bf03a..fa6464c172d7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1590,6 +1590,7 @@ unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
}
bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
switch (MI.getOpcode()) {
@@ -1675,8 +1676,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::V_SET_INACTIVE_B32: {
unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(MBB, MI, DL, get(NotOpc), Exec)
- .addReg(Exec);
+ auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
+ FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
.add(MI.getOperand(2));
BuildMI(MBB, MI, DL, get(NotOpc), Exec)
@@ -1687,8 +1688,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::V_SET_INACTIVE_B64: {
unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(MBB, MI, DL, get(NotOpc), Exec)
- .addReg(Exec);
+ auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
+ FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
MI.getOperand(0).getReg())
.add(MI.getOperand(2));
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 7c1cbd67c993..ecb875debefd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -145,6 +145,7 @@ def EXIT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
+let Defs = [SCC] in {
def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32: $src, VSrc_b32:$inactive),
[(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
@@ -156,6 +157,7 @@ def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst),
[(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> {
let Constraints = "$src = $vdst";
}
+} // End Defs = [SCC]
let usesCustomInserter = 1, Defs = [VCC, EXEC] in {
def V_ADD_U64_PSEUDO : VPseudoInstSI <
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
index 2e3f589bfb95..4f5ace0a40d6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
@@ -45,17 +45,17 @@ define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) {
define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 x i32> inreg %desc) {
; GCN-LABEL: set_inactive_scc:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x34
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
-; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_buffer_load_dword s1, s[8:11], 0x0
-; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: s_load_dword s6, s[0:1], 0x2c
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_cmp_lg_u32 s1, 56
+; GCN-NEXT: v_mov_b32_e32 v0, s6
+; GCN-NEXT: s_buffer_load_dword s0, s[0:3], 0x0
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, 42
; GCN-NEXT: s_not_b64 exec, exec
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_lg_u32 s0, 56
; GCN-NEXT: s_mov_b64 s[0:1], -1
; GCN-NEXT: s_cbranch_scc1 BB2_3
; GCN-NEXT: ; %bb.1: ; %Flow
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index 867908c1cdfd..21fa8d87112d 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -99,7 +99,7 @@ body: |
%8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, 0, 0, 0, implicit $exec
%16:vgpr_32 = COPY %8.sub1
%11:vgpr_32 = COPY %16
- %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec
+ %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
%14:vgpr_32 = COPY %7
%13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
early-clobber %15:vgpr_32 = WWM killed %13, implicit $exec
More information about the llvm-commits
mailing list