[llvm] [NFC] Reduce fragility of swdev503538-move-to-valu… test (PR #170702)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 4 08:53:24 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Daniil Fukalov (dfukalov)

<details>
<summary>Changes</summary>

This MIR test was generated from swdev503538-move-to-valu-stack-srd-physreg.ll using: `llc -stop-before=si-fix-sgpr-copies -O0`

The original test was created in PR #<!-- -->120815, but it depends on -O0 AND DAGCombiner, that is switched on by default for -O0. The patch reduces fragility of the test and removes dependency on DAGCombiner.

---
Full diff: https://github.com/llvm/llvm-project/pull/170702.diff


2 Files Affected:

- (removed) llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll (-23) 
- (added) llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir (+221) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll
deleted file mode 100644
index f0b3d334af67d..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs=0 -O0 2> %t.err < %s | FileCheck %s
-; RUN: FileCheck -check-prefix=ERR %s < %t.err
-
-; FIXME: This error will be fixed by supporting arbitrary divergent
-; dynamic allocas by performing a wave umax of the size.
-
-; ERR: error: <unknown>:0:0: in function move_to_valu_assert_srd_is_physreg_swdev503538 i32 (ptr addrspace(1)): illegal VGPR to SGPR copy
-
-; CHECK: ; illegal copy v0 to s32
-
-define i32 @move_to_valu_assert_srd_is_physreg_swdev503538(ptr addrspace(1) %ptr) {
-entry:
-  %idx = load i32, ptr addrspace(1) %ptr, align 4
-  %zero = extractelement <4 x i32> zeroinitializer, i32 %idx
-  %alloca = alloca [2048 x i8], i32 %zero, align 8, addrspace(5)
-  %ld = load i32, ptr addrspace(5) %alloca, align 8
-  call void @llvm.memset.p5.i32(ptr addrspace(5) %alloca, i8 0, i32 2048, i1 false)
-  ret i32 %ld
-}
-
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #0
-
-attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) }
diff --git a/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir
new file mode 100644
index 0000000000000..b21623d02142b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.mir
@@ -0,0 +1,221 @@
+# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -start-before=si-fix-sgpr-copies -O0 -verify-machineinstrs=0 -combiner-disabled=true %s -o - 2> %t.err | FileCheck %s
+# RUN: FileCheck -check-prefix=ERR %s < %t.err
+
+# FIXME: This error will be fixed by supporting arbitrary divergent
+# dynamic allocas by performing a wave umax of the size.
+
+# ERR: error: <unknown>:0:0: in function move_to_valu_assert_srd_is_physreg_swdev503538 i32 (ptr addrspace(1)): illegal VGPR to SGPR copy
+
+# CHECK: ; illegal copy v{{[0-9]+}} to s32
+
+--- |
+  ; ModuleID = 'llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll'
+  source_filename = "llvm/test/CodeGen/AMDGPU/swdev503538-move-to-valu-stack-srd-physreg.ll"
+  target datalayout = "e-m:e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+  target triple = "amdgcn-amd-amdhsa"
+  
+  define i32 @move_to_valu_assert_srd_is_physreg_swdev503538(ptr addrspace(1) %ptr) #0 {
+  entry:
+    %idx = load i32, ptr addrspace(1) %ptr, align 4
+    %zero = extractelement <4 x i32> zeroinitializer, i32 %idx
+    %alloca = alloca [2048 x i8], i32 %zero, align 8, addrspace(5)
+    %ld = load i32, ptr addrspace(5) %alloca, align 8
+    br label %loadstoreloop, !amdgpu.uniform !0
+  
+  loadstoreloop:                                    ; preds = %entry, %loadstoreloop
+    %0 = phi i32 [ %2, %loadstoreloop ], [ 0, %entry ]
+    %1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, i32 %0
+    store i8 0, ptr addrspace(5) %1, align 1
+    %2 = add i32 %0, 1
+    %3 = icmp uge i32 %2, 2048
+    br i1 %3, label %Flow, label %loadstoreloop, !amdgpu.uniform !0
+  
+  Flow:                                             ; preds = %loadstoreloop
+    br label %split, !amdgpu.uniform !0
+  
+  split:                                            ; preds = %Flow
+    ret i32 %ld
+  }
+  
+  ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
+  declare void @llvm.memset.p5.i32(ptr addrspace(5) writeonly captures(none), i8, i32, i1 immarg) #1
+  
+  attributes #0 = { "target-cpu"="gfx90a" }
+  attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) "target-cpu"="gfx90a" }
+  
+  !0 = !{}
+...
+---
+name:            move_to_valu_assert_srd_is_physreg_swdev503538
+alignment:       1
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+noPhis:          false
+isSSA:           true
+noVRegs:         false
+hasFakeUses:     false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: vgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 1, class: av_32, preferred-register: '', flags: [  ] }
+  - { id: 2, class: sreg_32, preferred-register: '', flags: [  ] }
+  - { id: 3, class: sreg_32, preferred-register: '', flags: [  ] }
+  - { id: 4, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 5, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 6, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 7, class: sgpr_64, preferred-register: '', flags: [  ] }
+  - { id: 8, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 9, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 10, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 11, class: sgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 12, class: vgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 13, class: vgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 14, class: av_64_align2, preferred-register: '', flags: [  ] }
+  - { id: 15, class: sreg_32, preferred-register: '', flags: [  ] }
+  - { id: 16, class: sreg_64, preferred-register: '', flags: [  ] }
+  - { id: 17, class: av_32, preferred-register: '', flags: [  ] }
+  - { id: 18, class: sreg_32, preferred-register: '', flags: [  ] }
+  - { id: 19, class: vgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 20, class: vgpr_32, preferred-register: '', flags: [  ] }
+  - { id: 21, class: sreg_32, preferred-register: '', flags: [  ] }
+  - { id: 22, class: sreg_32, preferred-register: '', flags: [  ] }
+liveins:
+  - { reg: '$vgpr0', virtual-reg: '%12' }
+  - { reg: '$vgpr1', virtual-reg: '%13' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+fixedStack:      []
+stack:
+  - { id: 0, name: alloca, type: variable-sized, offset: 0, alignment: 1, 
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo:
+  explicitKernArgSize: 0
+  maxKernArgAlign: 1
+  ldsSize:         0
+  gdsSize:         0
+  dynLDSAlign:     1
+  isEntryFunction: false
+  isChainFunction: false
+  noSignedZerosFPMath: false
+  memoryBound:     false
+  waveLimiter:     false
+  hasSpilledSGPRs: false
+  hasSpilledVGPRs: false
+  numWaveDispatchSGPRs: 16
+  numWaveDispatchVGPRs: 2
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+  bytesInStackArgArea: 0
+  returnsVoid:     false
+  argumentInfo:
+    privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+    dispatchPtr:     { reg: '$sgpr4_sgpr5' }
+    queuePtr:        { reg: '$sgpr6_sgpr7' }
+    dispatchID:      { reg: '$sgpr10_sgpr11' }
+    workGroupIDX:    { reg: '$sgpr12' }
+    workGroupIDY:    { reg: '$sgpr13' }
+    workGroupIDZ:    { reg: '$sgpr14' }
+    LDSKernelId:     { reg: '$sgpr15' }
+    implicitArgPtr:  { reg: '$sgpr8_sgpr9' }
+    workItemIDX:     { reg: '$vgpr31', mask: 1023 }
+    workItemIDY:     { reg: '$vgpr31', mask: 1047552 }
+    workItemIDZ:     { reg: '$vgpr31', mask: 1072693248 }
+  psInputAddr:     0
+  psInputEnable:   0
+  maxMemoryClusterDWords: 8
+  mode:
+    ieee:            true
+    dx10-clamp:      true
+    fp32-input-denormals: true
+    fp32-output-denormals: true
+    fp64-fp16-input-denormals: true
+    fp64-fp16-output-denormals: true
+  highBitsOf32BitAddress: 0
+  occupancy:       8
+  vgprForAGPRCopy: ''
+  sgprForEXECCopy: ''
+  longBranchReservedReg: ''
+  hasInitWholeWave: false
+  dynamicVGPRBlockSize: 0
+  scratchReservedForDynamicVGPRs: 0
+  numKernargPreloadSGPRs: 0
+  isWholeWaveFunction: false
+body:             |
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $vgpr0, $vgpr1
+  
+    %13:vgpr_32 = COPY $vgpr1
+    %12:vgpr_32 = COPY $vgpr0
+    %16:sreg_64 = REG_SEQUENCE %12, %subreg.sub0, %13, %subreg.sub1
+    %14:av_64_align2 = COPY %16
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc
+    %17:av_32 = COPY $sgpr32
+    $sgpr32 = COPY %17
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc
+    %0:vgpr_32 = COPY %17
+    %18:sreg_32 = COPY %17
+    %1:av_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, %18, 0, 0, 0, implicit $exec :: (load (s32) from %ir.alloca, align 8, addrspace 5)
+    %15:sreg_32 = S_MOV_B32 0
+    S_BRANCH %bb.1
+  
+  bb.1.loadstoreloop:
+    successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  
+    %2:sreg_32 = PHI %15, %bb.0, %3, %bb.1
+    %19:vgpr_32 = V_ADD_U32_e64 %0, %2, 0, implicit $exec
+    %20:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    BUFFER_STORE_BYTE_OFFEN killed %20, killed %19, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.1, addrspace 5)
+    %21:sreg_32 = S_MOV_B32 1
+    %3:sreg_32 = S_ADD_I32 %2, killed %21, implicit-def dead $scc
+    %22:sreg_32 = S_MOV_B32 2048
+    S_CMP_LT_U32 %3, killed %22, implicit-def $scc
+    S_CBRANCH_SCC1 %bb.1, implicit $scc
+    S_BRANCH %bb.2
+  
+  bb.2.Flow:
+    successors: %bb.3(0x80000000)
+  
+    S_BRANCH %bb.3
+  
+  bb.3.split:
+    $vgpr0 = COPY %1
+    SI_RETURN implicit $vgpr0
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/170702


More information about the llvm-commits mailing list