[llvm] 984451e - PostRAPseudos: Don't preserve kills on some implicit copy operands

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 18 10:52:10 PST 2022


Author: Matt Arsenault
Date: 2022-01-18T13:52:04-05:00
New Revision: 984451eafcedf80ce2db70b49757860992e723dc

URL: https://github.com/llvm/llvm-project/commit/984451eafcedf80ce2db70b49757860992e723dc
DIFF: https://github.com/llvm/llvm-project/commit/984451eafcedf80ce2db70b49757860992e723dc.diff

LOG: PostRAPseudos: Don't preserve kills on some implicit copy operands

This fixes a verifier error I ran into at -O0. A subregister copy had
an implicit kill of an overlapping superregister, which was partially
redefined by the copy. The preserved implicit operand killed
subregisters made live earlier in the sequence. AMDGPU already uses
similar logic for whether to preserve the kill of the superregister on
the final instruction if there's overlap.

Added: 
    llvm/test/CodeGen/AMDGPU/copy-phys-reg-implicit-operand-kills-subregs.mir
    llvm/test/CodeGen/AMDGPU/overlapping-tuple-copy-implicit-op-failure.ll

Modified: 
    llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
    llvm/test/CodeGen/X86/pr28560.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index 7300ea6b50ee3..d9caa8ad42d0b 100644
--- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -68,9 +68,16 @@ void ExpandPostRA::TransferImplicitOperands(MachineInstr *MI) {
   MachineBasicBlock::iterator CopyMI = MI;
   --CopyMI;
 
-  for (const MachineOperand &MO : MI->implicit_operands())
-    if (MO.isReg())
-      CopyMI->addOperand(MO);
+  Register DstReg = MI->getOperand(0).getReg();
+  for (const MachineOperand &MO : MI->implicit_operands()) {
+    CopyMI->addOperand(MO);
+
+    // Be conservative about preserving kills when subregister defs are
+    // involved. If there was implicit kill of a super-register overlapping the
+    // copy result, we would kill the subregisters previous copies defined.
+    if (MO.isKill() && TRI->regsOverlap(DstReg, MO.getReg()))
+      CopyMI->getOperand(CopyMI->getNumOperands() - 1).setIsKill(false);
+  }
 }
 
 bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {

diff  --git a/llvm/test/CodeGen/AMDGPU/copy-phys-reg-implicit-operand-kills-subregs.mir b/llvm/test/CodeGen/AMDGPU/copy-phys-reg-implicit-operand-kills-subregs.mir
new file mode 100644
index 0000000000000..9376a4c59c170
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/copy-phys-reg-implicit-operand-kills-subregs.mir
@@ -0,0 +1,24 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs -run-pass=postrapseudos -o - %s | FileCheck %s
+
+# The copy has an implicit def of a superregister which overlaps the
+# register it defines. We cannot preserve the kill on the tuple def
+# when copying implicit operands to the last inserted v_mov_b32, since
+# it kills the subregister defined earlier in the expansion.
+
+---
+name: copy_has_implicit_kill_superreg
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; CHECK-LABEL: name: copy_has_implicit_kill_superreg
+    ; CHECK: renamable $vgpr7_vgpr8_vgpr9_vgpr10 = IMPLICIT_DEF
+    ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit-def $vgpr7_vgpr8, implicit $vgpr10_vgpr11
+    ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr10_vgpr11, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+    ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr7
+    renamable $vgpr7_vgpr8_vgpr9_vgpr10 = IMPLICIT_DEF
+    renamable $vgpr7_vgpr8 = COPY killed renamable $vgpr10_vgpr11, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10
+    S_ENDPGM 0, implicit $vgpr7
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/overlapping-tuple-copy-implicit-op-failure.ll b/llvm/test/CodeGen/AMDGPU/overlapping-tuple-copy-implicit-op-failure.ll
new file mode 100644
index 0000000000000..a6c70a9d7c436
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/overlapping-tuple-copy-implicit-op-failure.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s
+
+; Testcase which happened to trigger a liveness verifier error
+define amdgpu_kernel void @test_long_add4(<4 x i64> %arg) #0 {
+; CHECK-LABEL: test_long_add4:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    global_load_dwordx4 v[7:10], v[0:1], off
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:16
+; CHECK-NEXT:    ; kill: def $vgpr7_vgpr8_vgpr9_vgpr10 killed $vgpr7_vgpr8_vgpr9_vgpr10 def $vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 killed $exec
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v14, v3
+; CHECK-NEXT:    v_mov_b32_e32 v13, v2
+; CHECK-NEXT:    v_mov_b32_e32 v12, v1
+; CHECK-NEXT:    v_mov_b32_e32 v11, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    global_load_dwordx4 v[18:21], v[0:1], off
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
+; CHECK-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off offset:16
+; CHECK-NEXT:    ; kill: def $vgpr18_vgpr19_vgpr20_vgpr21 killed $vgpr18_vgpr19_vgpr20_vgpr21 def $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25 killed $exec
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v25, v3
+; CHECK-NEXT:    v_mov_b32_e32 v24, v2
+; CHECK-NEXT:    v_mov_b32_e32 v23, v1
+; CHECK-NEXT:    v_mov_b32_e32 v22, v0
+; CHECK-NEXT:    v_mov_b32_e32 v2, v7
+; CHECK-NEXT:    v_mov_b32_e32 v3, v8
+; CHECK-NEXT:    v_mov_b32_e32 v0, v9
+; CHECK-NEXT:    v_mov_b32_e32 v1, v10
+; CHECK-NEXT:    v_mov_b32_e32 v5, v11
+; CHECK-NEXT:    v_mov_b32_e32 v6, v12
+; CHECK-NEXT:    v_mov_b32_e32 v12, v13
+; CHECK-NEXT:    v_mov_b32_e32 v13, v14
+; CHECK-NEXT:    v_mov_b32_e32 v8, v18
+; CHECK-NEXT:    v_mov_b32_e32 v9, v19
+; CHECK-NEXT:    v_mov_b32_e32 v16, v20
+; CHECK-NEXT:    v_mov_b32_e32 v17, v21
+; CHECK-NEXT:    v_mov_b32_e32 v14, v22
+; CHECK-NEXT:    v_mov_b32_e32 v15, v23
+; CHECK-NEXT:    v_mov_b32_e32 v10, v24
+; CHECK-NEXT:    v_mov_b32_e32 v11, v25
+; CHECK-NEXT:    v_mov_b32_e32 v4, v2
+; CHECK-NEXT:    v_mov_b32_e32 v2, v3
+; CHECK-NEXT:    v_mov_b32_e32 v7, v8
+; CHECK-NEXT:    v_mov_b32_e32 v3, v9
+; CHECK-NEXT:    v_add_co_u32 v7, s6, v4, v7
+; CHECK-NEXT:    v_add_co_ci_u32_e64 v2, s6, v2, v3, s6
+; CHECK-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v8, v2
+; CHECK-NEXT:    v_mov_b32_e32 v2, v0
+; CHECK-NEXT:    v_mov_b32_e32 v0, v1
+; CHECK-NEXT:    v_mov_b32_e32 v3, v16
+; CHECK-NEXT:    v_mov_b32_e32 v1, v17
+; CHECK-NEXT:    v_add_co_u32 v3, s6, v2, v3
+; CHECK-NEXT:    v_add_co_ci_u32_e64 v0, s6, v0, v1, s6
+; CHECK-NEXT:    ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v4, v0
+; CHECK-NEXT:    v_mov_b32_e32 v1, v5
+; CHECK-NEXT:    v_mov_b32_e32 v0, v6
+; CHECK-NEXT:    v_mov_b32_e32 v5, v14
+; CHECK-NEXT:    v_mov_b32_e32 v2, v15
+; CHECK-NEXT:    v_add_co_u32 v1, s6, v1, v5
+; CHECK-NEXT:    v_add_co_ci_u32_e64 v0, s6, v0, v2, s6
+; CHECK-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v2, v0
+; CHECK-NEXT:    v_mov_b32_e32 v5, v12
+; CHECK-NEXT:    v_mov_b32_e32 v0, v13
+; CHECK-NEXT:    v_mov_b32_e32 v9, v10
+; CHECK-NEXT:    v_mov_b32_e32 v6, v11
+; CHECK-NEXT:    v_add_co_u32 v5, s6, v5, v9
+; CHECK-NEXT:    v_add_co_ci_u32_e64 v0, s6, v0, v6, s6
+; CHECK-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v6, v0
+; CHECK-NEXT:    ; kill: def $vgpr7_vgpr8 killed $vgpr7_vgpr8 def $vgpr7_vgpr8_vgpr9_vgpr10 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v10, v4
+; CHECK-NEXT:    v_mov_b32_e32 v9, v3
+; CHECK-NEXT:    ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
+; CHECK-NEXT:    v_mov_b32_e32 v3, v5
+; CHECK-NEXT:    v_mov_b32_e32 v4, v6
+; CHECK-NEXT:    v_mov_b32_e32 v6, s5
+; CHECK-NEXT:    v_mov_b32_e32 v5, s4
+; CHECK-NEXT:    global_store_dwordx4 v[5:6], v[7:10], off
+; CHECK-NEXT:    s_mov_b64 s[4:5], 16
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    global_store_dwordx4 v0, v[1:4], s[4:5]
+; CHECK-NEXT:    s_endpgm
+entry:
+  %load0 = load <4 x i64>, <4 x i64> addrspace(1)* null, align 32
+  %load1 = load <4 x i64>, <4 x i64> addrspace(1)* null, align 32
+  %add = add <4 x i64> %load0, %load1
+  store <4 x i64> %add, <4 x i64> addrspace(1)* null, align 32
+  ret void
+}
+
+attributes #0 = { noinline optnone }

diff  --git a/llvm/test/CodeGen/X86/pr28560.ll b/llvm/test/CodeGen/X86/pr28560.ll
index 44964cc122a63..e66db955b68b9 100644
--- a/llvm/test/CodeGen/X86/pr28560.ll
+++ b/llvm/test/CodeGen/X86/pr28560.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=i686-pc-linux -print-after=postrapseudos < %s 2>&1 | FileCheck %s
 
-; CHECK: MOV8rr ${{[a-d]}}l, implicit killed $e[[R:[a-d]]]x, implicit-def $e[[R]]x
+; CHECK: MOV8rr ${{[a-d]}}l, implicit $e[[R:[a-d]]]x, implicit-def $e[[R]]x
 define i32 @foo(i32 %i, i32 %k, i8* %p) {
   %f = icmp ne i32 %i, %k
   %s = zext i1 %f to i8


        


More information about the llvm-commits mailing list