[PATCH] D130622: [AMDGPU][SIFoldOperands] Clear kills when folding COPY

Carl Ritson via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 27 20:01:50 PDT 2022


This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
critson marked an inline comment as done.
Closed by commit rGdbda30e2947b: [AMDGPU][SIFoldOperands] Clear kills when folding COPY (authored by critson).

Changed prior to commit:
  https://reviews.llvm.org/D130622?vs=448023&id=448227#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D130622/new/

https://reviews.llvm.org/D130622

Files:
  llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
  llvm/test/CodeGen/AMDGPU/si-fold-copy-kills.mir


Index: llvm/test/CodeGen/AMDGPU/si-fold-copy-kills.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/si-fold-copy-kills.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
+
+---
+name: fold_reg_kill
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: fold_reg_kill
+    ; GCN: liveins: $sgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY]], implicit-def $scc
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[COPY1]]
+    %0:sreg_32 = COPY $sgpr0
+    %1:sreg_32 = COPY %0
+    %2:sreg_32 = S_ADD_U32 killed %0, killed %0, implicit-def $scc
+    %3:sreg_32 = COPY %1
+    S_ENDPGM 0, implicit %2, implicit %3
+...
+
+---
+name:            fold_subreg_kill
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: fold_subreg_kill
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT:   liveins: $sgpr0_sgpr1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
+  ; GCN-NEXT:   [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (load (s128), align 4, addrspace 4)
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub1
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub0
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+  ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+  ; GCN-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY3]], %subreg.sub0, killed [[COPY4]], %subreg.sub1, killed [[COPY5]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET [[DEF]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+  bb.0:
+    liveins: $sgpr0_sgpr1
+
+    %0:sgpr_64(p4) = COPY $sgpr0_sgpr1
+    %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0:sgpr_64(p4), 9, 0 :: (load (s128), align 4, addrspace 4)
+    %2:sreg_64_xexec = COPY %1.sub0_sub1:sgpr_128
+    %3:sreg_64_xexec = COPY killed %1.sub2_sub3:sgpr_128
+    %4:sreg_64 = COPY %2:sreg_64_xexec
+    %5:sreg_32 = COPY %3.sub1:sreg_64_xexec
+
+  bb.1:
+    %6:sreg_32 = COPY %4.sub1:sreg_64
+    %7:sreg_32 = COPY %4.sub0:sreg_64
+    %8:sreg_32 = COPY %5:sreg_32
+    %9:sreg_32 = S_MOV_B32 -1
+    %10:sgpr_128 = REG_SEQUENCE killed %6:sreg_32, %subreg.sub0, killed %7:sreg_32, %subreg.sub1, killed %8:sreg_32, %subreg.sub2, killed %9:sreg_32, %subreg.sub3
+    %11:vgpr_32 = IMPLICIT_DEF
+    BUFFER_STORE_DWORD_OFFSET %11:vgpr_32, killed %10:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 1)
+...
+
Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -737,6 +737,9 @@
       CopiesToReplace.push_back(UseMI);
       OpToFold.setIsKill(false);
 
+      // Remove kill flags as kills may now be out of order with uses.
+      MRI->clearKillFlags(OpToFold.getReg());
+
       // That is very tricky to store a value into an AGPR. v_accvgpr_write_b32
       // can only accept VGPR or inline immediate. Recreate a reg_sequence with
       // its initializers right here, so we will rematerialize immediates and


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D130622.448227.patch
Type: text/x-patch
Size: 3935 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220728/ab6ca5c3/attachment.bin>


More information about the llvm-commits mailing list