[llvm] f903e3e - [AMDGPU] Reset kill flags for multiple uses of SDWAInst Ops

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 09:47:42 PDT 2024


Author: Jeffrey Byrnes
Date: 2024-07-01T09:14:02-07:00
New Revision: f903e3ec77d6de310d8bf7453b1106adb2d2becd

URL: https://github.com/llvm/llvm-project/commit/f903e3ec77d6de310d8bf7453b1106adb2d2becd
DIFF: https://github.com/llvm/llvm-project/commit/f903e3ec77d6de310d8bf7453b1106adb2d2becd.diff

LOG: [AMDGPU] Reset kill flags for multiple uses of SDWAInst Ops

Change-Id: I8b56d86a55c397623567945a87ad2f55749680bc

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
    llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index f47731bf6aac3..d428864c9dd59 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -1184,8 +1184,15 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
     if (PotentialMatches.count(Operand->getParentInst()) == 0)
       Converted |= Operand->convertToSDWA(*SDWAInst, TII);
   }
+
   if (Converted) {
     ConvertedInstructions.push_back(SDWAInst);
+    for (MachineOperand &MO : SDWAInst->uses()) {
+      if (!MO.isReg())
+        continue;
+
+      MRI->clearKillFlags(MO.getReg());
+    }
   } else {
     SDWAInst->eraseFromParent();
     return false;

diff  --git a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
index 4c61e6803febf..4ca39ecc7a0ae 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-preserve.mir
@@ -36,7 +36,7 @@ body:             |
     ; SDWA-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[FLAT_LOAD_DWORD]], 8, 8, implicit $exec
     ; SDWA-NEXT: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORD1]], implicit $exec
     ; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
-    ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit killed [[V_MUL_F32_sdwa]](tied-def 0)
+    ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F32_sdwa]](tied-def 0)
     ; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
     ; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
     ; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
@@ -185,7 +185,7 @@ body:             |
   ; SDWA-NEXT:   [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
   ; SDWA-NEXT: {{  $}}
   ; SDWA-NEXT: bb.2:
-  ; SDWA-NEXT:   [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit killed [[V_MUL_F32_sdwa]](tied-def 0)
+  ; SDWA-NEXT:   [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F32_sdwa]](tied-def 0)
   ; SDWA-NEXT:   FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
   ; SDWA-NEXT:   $sgpr30_sgpr31 = COPY [[COPY]]
   ; SDWA-NEXT:   S_SETPC_B64_return $sgpr30_sgpr31
@@ -217,3 +217,36 @@ body:             |
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31
 ...
+
+# Should not add kill flag to reused ops in SDWAInst
+
+---
+name: multiuse_kill
+tracksRegLiveness: true
+
+body:             |
+  bb.0:
+    ; SDWA-LABEL: name: multiuse_kill
+    ; SDWA: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; SDWA-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; SDWA-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; SDWA-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, killed [[DEF]], implicit $exec
+    ; SDWA-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
+    ; SDWA-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed [[S_MOV_B32_]], [[DEF1]], implicit $exec
+    ; SDWA-NEXT: [[V_OR_B32_sdwa:%[0-9]+]]:vgpr_32 = V_OR_B32_sdwa 0, [[DEF1]], 0, [[V_LSHLREV_B32_e64_]], 0, 6, 0, 4, 6, implicit $exec
+    ; SDWA-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, killed [[DEF2]], implicit $exec
+    ; SDWA-NEXT: [[V_OR_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_OR_B32_sdwa 0, [[DEF1]], 0, [[V_LSHLREV_B32_e64_1]], 0, 6, 0, 4, 6, implicit $exec
+    ; SDWA-NEXT: S_ENDPGM 0
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:vgpr_32 = IMPLICIT_DEF
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = V_LSHLREV_B32_e64 16, killed %0, implicit $exec
+    %4:sreg_32 = S_MOV_B32 65535
+    %5:vgpr_32 = V_AND_B32_e64 killed %4, killed %1, implicit $exec
+    %6:vgpr_32 = V_OR_B32_e64 %5, killed %3, implicit $exec
+    %7:vgpr_32 = V_LSHLREV_B32_e64 16, killed %2, implicit $exec
+    %8:vgpr_32 = V_OR_B32_e64 %5, killed %7, implicit $exec
+
+    S_ENDPGM 0
+
+...


        


More information about the llvm-commits mailing list