[llvm] ce16b68 - AMDGPU: Don't kill super-register with overlapping copy

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 16 06:46:21 PDT 2020


Author: Matt Arsenault
Date: 2020-10-16T09:34:35-04:00
New Revision: ce16b6835bce18989e1dc0796305fe703e59ca4d

URL: https://github.com/llvm/llvm-project/commit/ce16b6835bce18989e1dc0796305fe703e59ca4d
DIFF: https://github.com/llvm/llvm-project/commit/ce16b6835bce18989e1dc0796305fe703e59ca4d.diff

LOG: AMDGPU: Don't kill super-register with overlapping copy

This would end up killing part of the result super-register, resulting
in a verifier error on a later use of the overlapping registers.  We
could add kills of any non-aliasing registers, but we should be moving
away from relying on kill flags.

Added: 
    llvm/test/CodeGen/AMDGPU/copy-overlap-vgpr-kill.mir

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 52b14781bf73..77ed364dedcb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -919,6 +919,10 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
   ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, 4);
 
+  // If there is an overlap, we can't kill the super-register on the last
+  // instruction, since it will also kill the components made live by this def.
+  const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
+
   for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
     unsigned SubIdx;
     if (Forward)
@@ -926,7 +930,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     else
       SubIdx = SubIndices[SubIndices.size() - Idx - 1];
 
-    bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
+    bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1;
 
     if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
       Register ImpDefSuper = Idx == 0 ? Register(DestReg) : Register();

diff  --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
index de839d9253e8..7fa1d02439e1 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
@@ -26,7 +26,9 @@
 
     define amdgpu_kernel void @a_to_a() #0 { ret void }
     define amdgpu_kernel void @a2_to_a2() #0 { ret void }
-    define amdgpu_kernel void @a3_to_a3() #0 { ret void }
+    define amdgpu_kernel void @a2_to_a2_kill() #0 { ret void }
+    define amdgpu_kernel void @a3_to_a3_nonoverlap_kill() #0 { ret void }
+    define amdgpu_kernel void @a3_to_a3_overlap_kill() #0 { ret void }
     define amdgpu_kernel void @a4_to_a4() #0 { ret void }
     define amdgpu_kernel void @a4_to_a4_overlap() #0 { ret void }
     define amdgpu_kernel void @a8_to_a8() #0 { ret void }
@@ -455,39 +457,61 @@ body:             |
 ...
 
 ---
-name:            a2_to_a2
+name:            a2_to_a2_kill
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; GCN-LABEL: name: a2_to_a2
-    ; GCN: $agpr0_agpr1 = IMPLICIT_DEF
+    liveins: $agpr0_agpr1
+    ; GCN-LABEL: name: a2_to_a2_kill
+    ; GCN: liveins: $agpr0_agpr1
     ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit-def $agpr1_agpr2
-    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr1_agpr2
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1
     ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit $agpr1_agpr2
-    $agpr0_agpr1 = IMPLICIT_DEF
+    ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
     $agpr1_agpr2 = COPY killed $agpr0_agpr1, implicit $exec
-    S_ENDPGM 0, implicit $agpr1_agpr2
+    $agpr3 = COPY $agpr2
+    S_ENDPGM 0, implicit $agpr1, implicit $agpr2, implicit $agpr3
 ...
 
 ---
-name:            a3_to_a3
+name:            a3_to_a3_nonoverlap_kill
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; GCN-LABEL: name: a3_to_a3
-    ; GCN: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
-    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit-def $agpr2_agpr3_agpr4
-    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
-    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2
-    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4
-    $agpr0_agpr1_agpr2 = IMPLICIT_DEF
-    $agpr2_agpr3_agpr4 = COPY killed $agpr0_agpr1_agpr2, implicit $exec
-    S_ENDPGM 0, implicit $agpr2_agpr3_agpr4
+    liveins: $agpr4_agpr5_agpr6
+    ; GCN-LABEL: name: a3_to_a3_nonoverlap_kill
+    ; GCN: liveins: $agpr4_agpr5_agpr6
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr4, implicit $exec, implicit $agpr4_agpr5_agpr6
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr5, implicit $exec, implicit $agpr4_agpr5_agpr6
+    ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr6, implicit $exec, implicit killed $agpr4_agpr5_agpr6
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
+    $agpr0_agpr1_agpr2 = COPY killed $agpr4_agpr5_agpr6
+    S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
+...
+
+---
+name:            a3_to_a3_overlap_kill
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $agpr1_agpr2_agpr3
+    ; GCN-LABEL: name: a3_to_a3_overlap_kill
+    ; GCN: liveins: $agpr1_agpr2_agpr3
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr1_agpr2_agpr3
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
+    ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit $agpr1_agpr2_agpr3
+    ; GCN: $vgpr4 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit $agpr1_agpr2_agpr3
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr4, implicit $exec
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1
+    $agpr0_agpr1_agpr2 = COPY killed $agpr1_agpr2_agpr3
+    $vgpr1 = COPY $agpr1
+    S_ENDPGM 0, implicit $agpr0_agpr1_agpr2, implicit $vgpr1
 ...
 
 ---
@@ -502,7 +526,7 @@ body:             |
     ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
-    ; GCN: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3
+    ; GCN: $vgpr3 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr3, implicit $exec, implicit $exec
     ; GCN: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5
     $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AMDGPU/copy-overlap-vgpr-kill.mir b/llvm/test/CodeGen/AMDGPU/copy-overlap-vgpr-kill.mir
new file mode 100644
index 000000000000..a32b65b4915b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/copy-overlap-vgpr-kill.mir
@@ -0,0 +1,87 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=postrapseudos -o - %s | FileCheck %s
+
+# Don't set a kill of the super register on the last instruction with
+# an overlapping copy. This would kill part of the values in the
+# result copies.
+
+---
+name: overlapping_copy_kill_undef_reg_after_copy
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr30_sgpr31, $vgpr1_vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: overlapping_copy_kill_undef_reg_after_copy
+    ; CHECK: liveins: $sgpr30_sgpr31, $vgpr1_vgpr2_vgpr3
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr1_vgpr2_vgpr3
+    ; CHECK: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit $vgpr1_vgpr2_vgpr3
+    ; CHECK: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr1_vgpr2_vgpr3
+    ; CHECK: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec
+    ; CHECK: S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+    renamable $vgpr0_vgpr1_vgpr2 = COPY killed renamable $vgpr1_vgpr2_vgpr3
+    renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec
+    S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+
+...
+
+---
+name: overlapping_copy_kill_undef_reg_after_copy_1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr30_sgpr31, $vgpr2_vgpr3_vgpr4
+
+    ; CHECK-LABEL: name: overlapping_copy_kill_undef_reg_after_copy_1
+    ; CHECK: liveins: $sgpr30_sgpr31, $vgpr2_vgpr3_vgpr4
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr2_vgpr3_vgpr4
+    ; CHECK: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3_vgpr4
+    ; CHECK: $vgpr2 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit $vgpr2_vgpr3_vgpr4
+    ; CHECK: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec
+    ; CHECK: S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+    renamable $vgpr0_vgpr1_vgpr2 = COPY killed renamable $vgpr2_vgpr3_vgpr4
+    renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec
+    S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+
+...
+
+---
+name: nonoverlapping_copy_kill
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr30_sgpr31, $vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: nonoverlapping_copy_kill
+    ; CHECK: liveins: $sgpr30_sgpr31, $vgpr3_vgpr4_vgpr5
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr3_vgpr4_vgpr5
+    ; CHECK: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit $vgpr3_vgpr4_vgpr5
+    ; CHECK: $vgpr2 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit killed $vgpr3_vgpr4_vgpr5
+    ; CHECK: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec
+    ; CHECK: S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+    renamable $vgpr0_vgpr1_vgpr2 = COPY killed renamable $vgpr3_vgpr4_vgpr5
+    renamable $vgpr1 = nofpexcept V_MUL_F32_e32 0, $vgpr1, implicit $mode, implicit $exec
+    S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+
+...
+
+---
+name: overlapping_copy_kill_half_s128
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr30_sgpr31, $vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: overlapping_copy_kill_half_s128
+    ; CHECK: liveins: $sgpr30_sgpr31, $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: $vgpr1 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: $vgpr2 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: $vgpr3 = V_MOV_B32_e32 $vgpr5, implicit $exec, implicit $vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: renamable $vgpr1 = V_OR_B32_e32 1, $vgpr1, implicit $exec
+    ; CHECK: S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+    renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $vgpr2_vgpr3_vgpr4_vgpr5
+    renamable $vgpr1 = V_OR_B32_e32 1, $vgpr1, implicit $exec
+    S_SETPC_B64 $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+
+...


        


More information about the llvm-commits mailing list