[llvm] 1f0aadf - [AMDGPU] Fix kill flag on overlapping sgpr copy
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 18 05:36:22 PST 2022
Author: Sebastian Neubauer
Date: 2022-02-18T14:36:00+01:00
New Revision: 1f0aadfa62a56ca5a71bec0911538fe360bfc28d
URL: https://github.com/llvm/llvm-project/commit/1f0aadfa62a56ca5a71bec0911538fe360bfc28d
DIFF: https://github.com/llvm/llvm-project/commit/1f0aadfa62a56ca5a71bec0911538fe360bfc28d.diff
LOG: [AMDGPU] Fix kill flag on overlapping sgpr copy
Same as on vgpr copies, we cannot kill the source register if it
overlaps with the destination register. Otherwise, the kill of the
source register will also count as a kill for the destination register.
Differential Revision: https://reviews.llvm.org/D120042
Added:
llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9899c36352b84..7e5c9e990d4be 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -930,7 +930,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
- expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward);
+ const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
+ expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, CanKillSuperReg, RC,
+ Forward);
return;
}
diff --git a/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir b/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir
new file mode 100644
index 0000000000000..b97a9237a0b97
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/copy-overlap-sgpr-kill.mir
@@ -0,0 +1,49 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=postrapseudos -o - %s | FileCheck %s
+
+# Don't set a kill of the super register on the last instruction with
+# an overlapping copy. This would kill part of the values in the
+# result copies.
+
+---
+name: overlapping_copy_kill_undef_reg_after_copy
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+
+ ; CHECK-LABEL: name: overlapping_copy_kill_undef_reg_after_copy
+ ; CHECK: liveins: $sgpr30_sgpr31, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; CHECK-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+ ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+ ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+ ; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
+ ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
+ renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11
+ renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
+ S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7
+
+...
+
+---
+name: nonoverlapping_copy_kill
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
+
+ ; CHECK-LABEL: name: nonoverlapping_copy_kill
+ ; CHECK: liveins: $sgpr30_sgpr31, $sgpr3_sgpr4_sgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2
+ ; CHECK-NEXT: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
+ ; CHECK-NEXT: $sgpr2 = S_MOV_B32 $sgpr5, implicit killed $sgpr3_sgpr4_sgpr5
+ ; CHECK-NEXT: renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
+ ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
+ renamable $sgpr0_sgpr1_sgpr2 = COPY killed renamable $sgpr3_sgpr4_sgpr5
+ renamable $sgpr1 = S_ADD_I32 0, $sgpr1, implicit-def $scc
+ S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2
+
+...
More information about the llvm-commits
mailing list