[llvm] [AMDGPU] SIFixSgprCopies should not process twice VGPR to SGPR copies inserted by PHI preprocessing. (PR #134153)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 9 12:19:02 PDT 2025
================
@@ -0,0 +1,205 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass=si-fix-sgpr-copies -o - %s | FileCheck --check-prefix=GCN %s
+--- |
+ source_filename = "test1.ll"
+ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+ target triple = "amdgcn"
+
+ define amdgpu_kernel void @test_should_convert_to_v_readfirstlane_b32(float %fval, i32 %arg1, i32 %arg2, ptr addrspace(4) %out) #0 {
+ entry:
+ %test_should_convert_to_v_readfirstlane_b32.kernarg.segment = call nonnull align 16 dereferenceable(280) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+ %fval.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %test_should_convert_to_v_readfirstlane_b32.kernarg.segment, i64 36, !amdgpu.uniform !0
+ %0 = load <3 x i32>, ptr addrspace(4) %fval.kernarg.offset, align 4, !invariant.load !0
+ %fval.load1 = extractelement <3 x i32> %0, i32 0
+ %1 = bitcast i32 %fval.load1 to float
+ %arg1.load2 = extractelement <3 x i32> %0, i32 1
+ %arg2.load3 = extractelement <3 x i32> %0, i32 2
+ %conv = fptoui float %1 to i32
+ %shl = shl i32 %conv, 16
+ %or = or i32 %shl, %arg1.load2
+ %and = and i32 %or, %arg2.load3
+ %shr = lshr i32 %and, 2
+ %sgpr128_0 = insertelement <4 x i32> undef, i32 %shr, i32 0
+ %sgpr128_1 = insertelement <4 x i32> %sgpr128_0, i32 %or, i32 1
+ %sgpr128_2 = insertelement <4 x i32> %sgpr128_1, i32 %and, i32 2
+ %sgpr128_3 = insertelement <4 x i32> %sgpr128_2, i32 %shr, i32 3
+ call void @llvm.amdgcn.raw.buffer.store.i32(i32 %arg1.load2, <4 x i32> %sgpr128_3, i32 0, i32 0, i32 2)
+ ret void
+ }
+
+ declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg) #1
+
+ declare noundef align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2
+
+ attributes #0 = { "target-cpu"="gfx942" }
+ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(write) "target-cpu"="gfx942" }
+ attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+ !0 = !{}
+...
+---
+name: test_should_convert_to_v_readfirstlane_b32
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: false
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 1, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 2, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 3, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 4, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 5, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 6, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 7, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 8, class: sgpr_128, preferred-register: '', flags: [ ] }
+ - { id: 9, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 10, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 11, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 12, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 13, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 14, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 15, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 16, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 17, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 18, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 19, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 20, class: sgpr_128, preferred-register: '', flags: [ ] }
+ - { id: 21, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 22, class: vgpr_32, preferred-register: '', flags: [ ] }
+liveins:
+ - { reg: '$sgpr4_sgpr5', virtual-reg: '%3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
----------------
arsenm wrote:
```suggestion
```
https://github.com/llvm/llvm-project/pull/134153
More information about the llvm-commits
mailing list