[llvm] [AMDGPU] Inplace FI elimination during PEI for scalar copy instruction (PR #99556)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 19 04:31:39 PDT 2024
================
@@ -0,0 +1,82 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s
+--- |
+ target triple = "amdgcn-amd-amdhsa"
+
+ declare double @killer()
+ define void @bug() #0 {
+ ret void
+ }
+
+ attributes #0 = { "amdgpu-stack-objects" "target-cpu"="gfx90a" }
+
+...
+---
+name: bug
+tracksRegLiveness: true
+stack:
+ - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: '', type: default, offset: 8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 8, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 2, name: '', type: default, offset: 24, size: 8, alignment: 8,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 24, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+
+machineFunctionInfo:
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+body: |
+ bb.0:
+ liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; CHECK-LABEL: name: bug
+ ; CHECK: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr31, $vgpr40, $vgpr63, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
+ ; CHECK-NEXT: renamable $sgpr17 = S_MOV_B32 0
+ ; CHECK-NEXT: undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+ ; CHECK-NEXT: $sgpr24 = S_LSHR_B32 $sgpr32, 6, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr29 = COPY undef renamable $sgpr30
+ ; CHECK-NEXT: $sgpr20 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr20 = S_ADD_I32 killed $sgpr20, 4, implicit-def $scc
+ ; CHECK-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
+ ; CHECK-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
+ ; CHECK-NEXT: $sgpr31 = S_LSHR_B32 $sgpr32, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr31 = S_ADD_I32 killed $sgpr31, 8, implicit-def $scc
+ ; CHECK-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
+ ; CHECK-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
+ ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
+ ; CHECK-NEXT: SI_RETURN
+ renamable $sgpr18_sgpr19 = S_MOV_B64 $src_private_base
+ renamable $sgpr17 = S_MOV_B32 0
+ undef renamable $vcc_lo = COPY undef renamable $sgpr17, implicit-def $vcc
+ renamable $sgpr24 = S_MOV_B32 %stack.0
+ renamable $sgpr29 = COPY undef renamable $sgpr30
+ renamable $sgpr20 = S_MOV_B32 %stack.1
+ undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23
+ undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27
+ renamable $sgpr31 = S_MOV_B32 %stack.2
+ renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr26_sgpr27, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr24_sgpr25, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr22_sgpr23, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr20_sgpr21, implicit $exec
+ renamable $vgpr0_vgpr1 = COPY killed renamable $sgpr18_sgpr19, implicit $exec
+ dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @killer, csr_amdgpu_gfx90ainsts, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1
----------------
arsenm wrote:
Replace the function reference with 0
https://github.com/llvm/llvm-project/pull/99556
More information about the llvm-commits
mailing list