[PATCH] D95509: [AMDGPU] Mark V_SET_INACTIVE as defining SCC
Carl Ritson via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 27 19:43:19 PST 2021
critson updated this revision to Diff 319742.
critson added a comment.
- Rebase onto pre-committed tests.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D95509/new/
https://reviews.llvm.org/D95509
Files:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
llvm/test/CodeGen/AMDGPU/wqm.mir
Index: llvm/test/CodeGen/AMDGPU/wqm.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/wqm.mir
+++ llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -99,7 +99,7 @@
%8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, 0, 0, 0, implicit $exec
%16:vgpr_32 = COPY %8.sub1
%11:vgpr_32 = COPY %16
- %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec
+ %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
%14:vgpr_32 = COPY %7
%13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
early-clobber %15:vgpr_32 = WWM killed %13, implicit $exec
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
@@ -45,17 +45,17 @@
define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 x i32> inreg %desc) {
; GCN-LABEL: set_inactive_scc:
; GCN: ; %bb.0:
-; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x34
; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
-; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_buffer_load_dword s1, s[8:11], 0x0
-; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: s_load_dword s6, s[0:1], 0x2c
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x34
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_cmp_lg_u32 s1, 56
+; GCN-NEXT: v_mov_b32_e32 v0, s6
+; GCN-NEXT: s_buffer_load_dword s0, s[0:3], 0x0
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, 42
; GCN-NEXT: s_not_b64 exec, exec
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_cmp_lg_u32 s0, 56
; GCN-NEXT: s_mov_b64 s[0:1], -1
; GCN-NEXT: s_cbranch_scc1 BB2_3
; GCN-NEXT: ; %bb.1: ; %Flow
Index: llvm/lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstructions.td
+++ llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -145,6 +145,7 @@
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
+let Defs = [SCC] in {
def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VGPR_32: $src, VSrc_b32:$inactive),
[(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
@@ -156,6 +157,7 @@
[(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> {
let Constraints = "$src = $vdst";
}
+} // End Defs = [SCC]
let usesCustomInserter = 1, Defs = [VCC, EXEC] in {
def V_ADD_U64_PSEUDO : VPseudoInstSI <
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1590,6 +1590,7 @@
}
bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
switch (MI.getOpcode()) {
@@ -1675,8 +1676,8 @@
case AMDGPU::V_SET_INACTIVE_B32: {
unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(MBB, MI, DL, get(NotOpc), Exec)
- .addReg(Exec);
+ auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
+ FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
.add(MI.getOperand(2));
BuildMI(MBB, MI, DL, get(NotOpc), Exec)
@@ -1687,8 +1688,8 @@
case AMDGPU::V_SET_INACTIVE_B64: {
unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(MBB, MI, DL, get(NotOpc), Exec)
- .addReg(Exec);
+ auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
+ FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
MI.getOperand(0).getReg())
.add(MI.getOperand(2));
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D95509.319742.patch
Type: text/x-patch
Size: 4365 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210128/a46ae954/attachment.bin>
More information about the llvm-commits
mailing list