[llvm] 1f0aadf - [AMDGPU] Fix kill flag on overlapping sgpr copy

Sebastian Neubauer via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 18 05:36:22 PST 2022


Author: Sebastian Neubauer
Date: 2022-02-18T14:36:00+01:00
New Revision: 1f0aadfa62a56ca5a71bec0911538fe360bfc28d

URL: https://github.com/llvm/llvm-project/commit/1f0aadfa62a56ca5a71bec0911538fe360bfc28d
DIFF: https://github.com/llvm/llvm-project/commit/1f0aadfa62a56ca5a71bec0911538fe360bfc28d.diff

LOG: [AMDGPU] Fix kill flag on overlapping sgpr copy

Same as on vgpr copies, we cannot kill the source register if it
overlaps with the destination register. Otherwise, the kill of the
source register will also count as a kill for the destination register.

Differential Revision: https://reviews.llvm.org/D120042

Added: 
    llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9899c36352b84..7e5c9e990d4be 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -930,7 +930,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
       return;
     }
-    expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward);
+    const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
+    expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, CanKillSuperReg, RC,
+                   Forward);
     return;
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir b/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir
new file mode 100644
index 0000000000000..b97a9237a0b97
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir
@@ -0,0 +1,49 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=postrapseudos -o - %s | FileCheck %s
+
+# Don't set a kill of the super register on the last instruction with
+# an overlapping copy. This would kill part of the values in the
+# result copies.
+
+---
+name: overlapping_copy_kill_undef_reg_after_copy
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+
+    ; CHECK-LABEL: name: overlapping_copy_kill_undef_reg_after_copy
+    ; CHECK: liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; CHECK-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+    ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+    ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+    ; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
+    ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
+    renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+    renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
+    S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
+
+...
+
+---
+name: nonoverlapping_copy_kill
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
+
+    ; CHECK-LABEL: name: nonoverlapping_copy_kill
+    ; CHECK: liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2
+    ; CHECK-NEXT: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
+    ; CHECK-NEXT: $sgpr2 = S_MOV_B32 $sgpr5, implicit killed $sgpr3_sgpr4_sgpr5
+    ; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
+    ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
+    renamable $sgpr0_sgpr1_sgpr2 = COPY killed renamable $sgpr3_sgpr4_sgpr5
+    renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
+    S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
+
+...


        


More information about the llvm-commits mailing list