[llvm] [NFC] Reduce fragility of swdev503538-move-to-valu… test (PR #170702)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 4 08:53:24 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Daniil Fukalov (dfukalov)
<details>
<summary>Changes</summary>
This MIR test was generated from swdev503538-move-to-valu-stack-srd-physreg.ll using: `llc -stop-before=si-fix-sgpr-copies -O0`
The original test was created in PR #<!-- -->120815, but it depends on -O0 AND DAGCombiner, that is switched on by default for -O0. The patch reduces fragility of the test and removes dependency on DAGCombiner.
---
Full diff: https://github.com/llvm/llvm-project/pull/170702.diff
2 Files Affected:
- (removed) llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll (-23)
- (added) llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir (+221)
``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll
deleted file mode 100644
index f0b3d334af67d..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs=0 -O0 2> %t.err < %s | FileCheck %s
-; RUN: FileCheck -check-prefix=ERR %s < %t.err
-
-; FIXME: This error will be fixed by supporting arbitrary divergent
-; dynamic allocas by performing a wave umax of the size.
-
-; ERR: error: <unknown>:0:0: in function move_to_valu_assert_srd_is_physreg_swdev503538 i32 (ptr addrspace(1)): illegal VGPR to SGPR copy
-
-; CHECK: ; illegal copy v0 to s32
-
-define i32 @move_to_valu_assert_srd_is_physreg_swdev503538(ptr addrspace(1) %ptr) {
-entry:
- %idx = load i32, ptr addrspace(1) %ptr, align 4
- %zero = extractelement <4 x i32> zeroinitializer, i32 %idx
- %alloca = alloca [2048 x i8], i32 %zero, align 8, addrspace(5)
- %ld = load i32, ptr addrspace(5) %alloca, align 8
- call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 2048, i1 false)
- ret i32 %ld
-}
-
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #0
-
-attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) }
diff --git a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir
new file mode 100644
index 0000000000000..b21623d02142b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir
@@ -0,0 +1,221 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -start-before=si-fix-sgpr-copies -O0 -verify-machineinstrs=0 -combiner-disabled=true %s -o - 2> %t.err | FileCheck %s
+# RUN: FileCheck -check-prefix=ERR %s < %t.err
+
+# FIXME: This error will be fixed by supporting arbitrary divergent
+# dynamic allocas by performing a wave umax of the size.
+
+# ERR: error: <unknown>:0:0: in function move_to_valu_assert_srd_is_physreg_swdev503538 i32 (ptr addrspace(1)): illegal VGPR to SGPR copy
+
+# CHECK: ; illegal copy v{{[0-9]+}} to s32
+
+--- |
+ ; ModuleID = 'llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll'
+ source_filename = "llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll"
+ target datalayout = "e-m:e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+ target triple = "amdgcn-amd-amdhsa"
+
+ define i32 @move_to_valu_assert_srd_is_physreg_swdev503538(ptr addrspace(1) %ptr) #0 {
+ entry:
+ %idx = load i32, ptr addrspace(1) %ptr, align 4
+ %zero = extractelement <4 x i32> zeroinitializer, i32 %idx
+ %alloca = alloca [2048 x i8], i32 %zero, align 8, addrspace(5)
+ %ld = load i32, ptr addrspace(5) %alloca, align 8
+ br label %loadstoreloop, !amdgpu.uniform !0
+
+ loadstoreloop: ; preds = %entry, %loadstoreloop
+ %0 = phi i32 [ %2, %loadstoreloop ], [ 0, %entry ]
+ %1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, i32 %0
+ store i8 0, ptr addrspace(5) %1, align 1
+ %2 = add i32 %0, 1
+ %3 = icmp uge i32 %2, 2048
+ br i1 %3, label %Flow, label %loadstoreloop, !amdgpu.uniform !0
+
+ Flow: ; preds = %loadstoreloop
+ br label %split, !amdgpu.uniform !0
+
+ split: ; preds = %Flow
+ ret i32 %ld
+ }
+
+ ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+ declare void @llvm.memset.p5.i32(ptr addrspace(5) writeonly captures(none), i8, i32, i1 immarg) #1
+
+ attributes #0 = { "target-cpu"="gfx90a" }
+ attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) "target-cpu"="gfx90a" }
+
+ !0 = !{}
+...
+---
+name: move_to_valu_assert_srd_is_physreg_swdev503538
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: false
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 1, class: av_32, preferred-register: '', flags: [ ] }
+ - { id: 2, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 3, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 4, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 5, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 6, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 7, class: sgpr_64, preferred-register: '', flags: [ ] }
+ - { id: 8, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 9, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 10, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 11, class: sgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 12, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 13, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 14, class: av_64_align2, preferred-register: '', flags: [ ] }
+ - { id: 15, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 16, class: sreg_64, preferred-register: '', flags: [ ] }
+ - { id: 17, class: av_32, preferred-register: '', flags: [ ] }
+ - { id: 18, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 19, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 20, class: vgpr_32, preferred-register: '', flags: [ ] }
+ - { id: 21, class: sreg_32, preferred-register: '', flags: [ ] }
+ - { id: 22, class: sreg_32, preferred-register: '', flags: [ ] }
+liveins:
+ - { reg: '$vgpr0', virtual-reg: '%12' }
+ - { reg: '$vgpr1', virtual-reg: '%13' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+fixedStack: []
+stack:
+ - { id: 0, name: alloca, type: variable-sized, offset: 0, alignment: 1,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 0
+ maxKernArgAlign: 1
+ ldsSize: 0
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: false
+ isChainFunction: false
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: false
+ hasSpilledVGPRs: false
+ numWaveDispatchSGPRs: 16
+ numWaveDispatchVGPRs: 2
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ frameOffsetReg: '$sgpr33'
+ stackPtrOffsetReg: '$sgpr32'
+ bytesInStackArgArea: 0
+ returnsVoid: false
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ dispatchPtr: { reg: '$sgpr4_sgpr5' }
+ queuePtr: { reg: '$sgpr6_sgpr7' }
+ dispatchID: { reg: '$sgpr10_sgpr11' }
+ workGroupIDX: { reg: '$sgpr12' }
+ workGroupIDY: { reg: '$sgpr13' }
+ workGroupIDZ: { reg: '$sgpr14' }
+ LDSKernelId: { reg: '$sgpr15' }
+ implicitArgPtr: { reg: '$sgpr8_sgpr9' }
+ workItemIDX: { reg: '$vgpr31', mask: 1023 }
+ workItemIDY: { reg: '$vgpr31', mask: 1047552 }
+ workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
+ psInputAddr: 0
+ psInputEnable: 0
+ maxMemoryClusterDWords: 8
+ mode:
+ ieee: true
+ dx10-clamp: true
+ fp32-input-denormals: true
+ fp32-output-denormals: true
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 8
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: ''
+ longBranchReservedReg: ''
+ hasInitWholeWave: false
+ dynamicVGPRBlockSize: 0
+ scratchReservedForDynamicVGPRs: 0
+ numKernargPreloadSGPRs: 0
+ isWholeWaveFunction: false
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $vgpr0, $vgpr1
+
+ %13:vgpr_32 = COPY $vgpr1
+ %12:vgpr_32 = COPY $vgpr0
+ %16:sreg_64 = REG_SEQUENCE %12, %subreg.sub0, %13, %subreg.sub1
+ %14:av_64_align2 = COPY %16
+ ADJCALLSTACKUP 0, 0, implicit-def dead $scc
+ %17:av_32 = COPY $sgpr32
+ $sgpr32 = COPY %17
+ ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc
+ %0:vgpr_32 = COPY %17
+ %18:sreg_32 = COPY %17
+ %1:av_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, %18, 0, 0, 0, implicit $exec :: (load (s32) from %ir.alloca, align 8, addrspace 5)
+ %15:sreg_32 = S_MOV_B32 0
+ S_BRANCH %bb.1
+
+ bb.1.loadstoreloop:
+ successors: %bb.2(0x40000000), %bb.1(0x40000000)
+
+ %2:sreg_32 = PHI %15, %bb.0, %3, %bb.1
+ %19:vgpr_32 = V_ADD_U32_e64 %0, %2, 0, implicit $exec
+ %20:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ BUFFER_STORE_BYTE_OFFEN killed %20, killed %19, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.1, addrspace 5)
+ %21:sreg_32 = S_MOV_B32 1
+ %3:sreg_32 = S_ADD_I32 %2, killed %21, implicit-def dead $scc
+ %22:sreg_32 = S_MOV_B32 2048
+ S_CMP_LT_U32 %3, killed %22, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+ S_BRANCH %bb.2
+
+ bb.2.Flow:
+ successors: %bb.3(0x80000000)
+
+ S_BRANCH %bb.3
+
+ bb.3.split:
+ $vgpr0 = COPY %1
+ SI_RETURN implicit $vgpr0
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/170702
More information about the llvm-commits
mailing list