[llvm] [AMDGPU] Inplace FI elimination during PEI for scalar copy instruction (PR #99556)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 19 04:31:39 PDT 2024


================
@@ -0,0 +1,82 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
+--- |
+  target triple = "amdgcn-amd-amdhsa"
+
+  declare double @killer()
+  define void @bug() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-stack-objects" "target-cpu"="gfx90a" }
+
+...
+---
+name:            bug
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 1, name: '', type: default, offset: 8, size: 4, alignment: 4,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 8, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 2, name: '', type: default, offset: 24, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: 24, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+    ; CHECK-LABEL: name: bug
+    ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
+    ; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
+    ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+    ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+    ; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
+    ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
+    ; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
+    ; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
+    ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+    ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
+    ; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
+    ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
+    ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+    ; CHECK-NEXT: SI_RETURN
+    renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
+    renamable $sgpr17 = S_MOV_B32 0
+    undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+    renamable $sgpr24 = S_MOV_B32 %stack.0
+    renamable $sgpr29 = COPY undef renamable $sgpr30
+    renamable $sgpr20 = S_MOV_B32 %stack.1
+    undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
+    undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
+    renamable $sgpr31 = S_MOV_B32 %stack.2
+    renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
+    renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
+    dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
----------------
arsenm wrote:

Replace the function reference with 0 

https://github.com/llvm/llvm-project/pull/99556


More information about the llvm-commits mailing list