[llvm] f903e3e - [AMDGPU] Reset kill flags for multiple uses of SDWAInst Ops
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 09:47:42 PDT 2024
Author: Jeffrey Byrnes
Date: 2024-07-01T09:14:02-07:00
New Revision: f903e3ec77d6de310d8bf7453b1106adb2d2becd
URL: https://github.com/llvm/llvm-project/commit/f903e3ec77d6de310d8bf7453b1106adb2d2becd
DIFF: https://github.com/llvm/llvm-project/commit/f903e3ec77d6de310d8bf7453b1106adb2d2becd.diff
LOG: [AMDGPU] Reset kill flags for multiple uses of SDWAInst Ops
Change-Id: I8b56d86a55c397623567945a87ad2f55749680bc
Added:
Modified:
llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index f47731bf6aac3..d428864c9dd59 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -1184,8 +1184,15 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
if (PotentialMatches.count(Operand->getParentInst()) == 0)
Converted |= Operand->convertToSDWA(*SDWAInst, TII);
}
+
if (Converted) {
ConvertedInstructions.push_back(SDWAInst);
+ for (MachineOperand &MO : SDWAInst->uses()) {
+ if (!MO.isReg())
+ continue;
+
+ MRI->clearKillFlags(MO.getReg());
+ }
} else {
SDWAInst->eraseFromParent();
return false;
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
index 4c61e6803febf..4ca39ecc7a0ae 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
@@ -36,7 +36,7 @@ body: |
; SDWA-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[FLAT_LOAD_DWORD]], 8, 8, implicit $exec
; SDWA-NEXT: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORD1]], implicit $exec
; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
- ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit killed [[V_MUL_F32_sdwa]](tied-def 0)
+ ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F32_sdwa]](tied-def 0)
; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
@@ -185,7 +185,7 @@ body: |
; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
; SDWA-NEXT: {{ $}}
; SDWA-NEXT: bb.2:
- ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit killed [[V_MUL_F32_sdwa]](tied-def 0)
+ ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F32_sdwa]](tied-def 0)
; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
@@ -217,3 +217,36 @@ body: |
$sgpr30_sgpr31 = COPY %2
S_SETPC_B64_return $sgpr30_sgpr31
...
+
+# Should not add kill flag to reused ops in SDWAInst
+
+---
+name: multiuse_kill
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ ; SDWA-LABEL: name: multiuse_kill
+ ; SDWA: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SDWA-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SDWA-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SDWA-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, killed [[DEF]], implicit $exec
+ ; SDWA-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+ ; SDWA-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed [[S_MOV_B32_]], [[DEF1]], implicit $exec
+ ; SDWA-NEXT: [[V_OR_B32_sdwa:%[0-9]+]]:vgpr_32 = V_OR_B32_sdwa 0, [[DEF1]], 0, [[V_LSHLREV_B32_e64_]], 0, 6, 0, 4, 6, implicit $exec
+ ; SDWA-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, killed [[DEF2]], implicit $exec
+ ; SDWA-NEXT: [[V_OR_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_OR_B32_sdwa 0, [[DEF1]], 0, [[V_LSHLREV_B32_e64_1]], 0, 6, 0, 4, 6, implicit $exec
+ ; SDWA-NEXT: S_ENDPGM 0
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = V_LSHLREV_B32_e64 16, killed %0, implicit $exec
+ %4:sreg_32 = S_MOV_B32 65535
+ %5:vgpr_32 = V_AND_B32_e64 killed %4, killed %1, implicit $exec
+ %6:vgpr_32 = V_OR_B32_e64 %5, killed %3, implicit $exec
+ %7:vgpr_32 = V_LSHLREV_B32_e64 16, killed %2, implicit $exec
+ %8:vgpr_32 = V_OR_B32_e64 %5, killed %7, implicit $exec
+
+ S_ENDPGM 0
+
+...
More information about the llvm-commits
mailing list