[llvm] [AMDGPU] Fixed llvm-debuginfo-analyzer for AMDGPU. (PR #145125)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 7 21:01:03 PDT 2025
================
@@ -0,0 +1,210 @@
+# RUN: llc %s -o - -mcpu=gfx1030 -O0 -run-pass=si-pre-allocate-wwm-regs | FileCheck %s
+
+# Simple regression test to make sure DBG_VALUE $noreg does not assert in the pass
+
+# CHECK: S_ENDPGM
+
+--- |
+ source_filename = "module"
+ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+ target triple = "amdgcn-amd-amdpal"
+
+ %dx.types.ResRet.f32 = type { float, float, float, float, i32 }
+
+ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg noundef %globalTable, i32 inreg noundef %userdata4, <3 x i32> inreg noundef %WorkgroupId, i32 inreg noundef %MultiDispatchInfo, <3 x i32> noundef %LocalInvocationId) #0 !dbg !14 {
+ %LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i64 0, !dbg !28
+ %WorkgroupId.i0 = extractelement <3 x i32> %WorkgroupId, i64 0, !dbg !28
+ %1 = call i64 @llvm.amdgcn.s.getpc(), !dbg !28
+ %2 = shl i32 %WorkgroupId.i0, 6, !dbg !28
+ %3 = add i32 %LocalInvocationId.i0, %2, !dbg !28
+ #dbg_value(i32 %3, !29, !DIExpression(DW_OP_LLVM_fragment, 0, 32), !28)
+ %4 = and i64 %1, -4294967296, !dbg !30
+ %5 = zext i32 %userdata4 to i64, !dbg !30
+ %6 = or disjoint i64 %4, %5, !dbg !30
+ %7 = inttoptr i64 %6 to ptr addrspace(4), !dbg !30, !amdgpu.uniform !2
+ %8 = load <4 x i32>, ptr addrspace(4) %7, align 4, !dbg !30, !invariant.load !2
+ %9 = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %8, i32 %3, i32 0, i32 0, i32 0), !dbg !30
+ #dbg_value(%dx.types.ResRet.f32 poison, !31, !DIExpression(), !32)
+ %10 = fmul reassoc arcp contract afn float %9, 2.000000e+00, !dbg !33
+ #dbg_value(float %10, !34, !DIExpression(), !35)
+ %11 = getelementptr i8, ptr addrspace(4) %7, i64 32, !dbg !36, !amdgpu.uniform !2
+ %.upto01 = insertelement <4 x float> poison, float %10, i64 0, !dbg !36
+ %12 = shufflevector <4 x float> %.upto01, <4 x float> poison, <4 x i32> zeroinitializer, !dbg !36
+ %13 = load <4 x i32>, ptr addrspace(4) %11, align 4, !dbg !36, !invariant.load !2
+ call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %12, <4 x i32> %13, i32 %3, i32 0, i32 0, i32 0), !dbg !36
+ ret void, !dbg !37
+ }
+
+ declare noundef i64 @llvm.amdgcn.s.getpc() #1
+ declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #3
+ declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #4
+
+ attributes #0 = { memory(readwrite) "amdgpu-flat-work-group-size"="64,64" "amdgpu-memory-bound"="false" "amdgpu-num-sgpr"="4294967295" "amdgpu-num-vgpr"="4294967295" "amdgpu-prealloc-sgpr-spill-vgprs" "amdgpu-unroll-threshold"="1200" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="3" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "target-cpu"="gfx1030" "target-features"=",+wavefrontsize64,+cumode,+enable-flat-scratch" }
+ attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1030" }
+ attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx1030" }
+ attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) "target-cpu"="gfx1030" }
+ attributes #4 = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx1030" }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!12, !13}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "dxcoob 1.7.2308.16 (52da17e29)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3)
+ !1 = !DIFile(filename: "tests\\basic_var.hlsl", directory: "")
+ !2 = !{}
+ !3 = !{!4, !10}
+ !4 = distinct !DIGlobalVariableExpression(var: !5, expr: !DIExpression())
+ !5 = !DIGlobalVariable(name: "u0", linkageName: "\01?u0@@3V?$RWBuffer at M@@A", scope: !0, file: !1, line: 2, type: !6, isLocal: false, isDefinition: true)
+ !6 = !DICompositeType(tag: DW_TAG_class_type, name: "RWBuffer<float>", file: !1, line: 2, size: 32, align: 32, elements: !2, templateParams: !7)
+ !7 = !{!8}
+ !8 = !DITemplateTypeParameter(name: "element", type: !9)
+ !9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+ !10 = distinct !DIGlobalVariableExpression(var: !11, expr: !DIExpression())
+ !11 = !DIGlobalVariable(name: "u1", linkageName: "\01?u1@@3V?$RWBuffer at M@@A", scope: !0, file: !1, line: 3, type: !6, isLocal: false, isDefinition: true)
+ !12 = !{i32 2, !"Dwarf Version", i32 5}
+ !13 = !{i32 2, !"Debug Info Version", i32 3}
+ !14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !15, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+ !15 = !DISubroutineType(types: !16)
+ !16 = !{null, !17}
+ !17 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint3", file: !1, baseType: !18)
+ !18 = !DICompositeType(tag: DW_TAG_class_type, name: "vector<unsigned int, 3>", file: !1, size: 96, align: 32, elements: !19, templateParams: !24)
+ !19 = !{!20, !22, !23}
+ !20 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !18, file: !1, baseType: !21, size: 32, align: 32, flags: DIFlagPublic)
+ !21 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+ !22 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 32, flags: DIFlagPublic)
+ !23 = !DIDerivedType(tag: DW_TAG_member, name: "z", scope: !18, file: !1, baseType: !21, size: 32, align: 32, offset: 64, flags: DIFlagPublic)
+ !24 = !{!25, !26}
+ !25 = !DITemplateTypeParameter(name: "element", type: !21)
+ !26 = !DITemplateValueParameter(name: "element_count", type: !27, value: i32 3)
+ !27 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !28 = !DILocation(line: 7, column: 17, scope: !14)
+ !29 = !DILocalVariable(name: "dtid", arg: 1, scope: !14, file: !1, line: 7, type: !17)
+ !30 = !DILocation(line: 11, column: 18, scope: !14)
+ !31 = !DILocalVariable(name: "my_var", scope: !14, file: !1, line: 11, type: !9)
+ !32 = !DILocation(line: 11, column: 9, scope: !14)
+ !33 = !DILocation(line: 14, column: 26, scope: !14)
+ !34 = !DILocalVariable(name: "my_var2", scope: !14, file: !1, line: 14, type: !9)
+ !35 = !DILocation(line: 14, column: 9, scope: !14)
+ !36 = !DILocation(line: 17, column: 14, scope: !14)
+ !37 = !DILocation(line: 19, column: 1, scope: !14)
+...
+---
+name: _amdgpu_cs_main
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: true
+isSSA: false
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHContTarget: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ explicitKernArgSize: 0
+ maxKernArgAlign: 4
+ ldsSize: 0
+ gdsSize: 0
+ dynLDSAlign: 1
+ isEntryFunction: true
+ isChainFunction: false
+ noSignedZerosFPMath: false
+ memoryBound: false
+ waveLimiter: false
+ hasSpilledSGPRs: true
+ hasSpilledVGPRs: false
+ scratchRSrcReg: '$private_rsrc_reg'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sgpr32'
+ bytesInStackArgArea: 0
+ returnsVoid: true
+ argumentInfo:
+ privateSegmentWaveByteOffset: { reg: '$sgpr6' }
+ psInputAddr: 0
+ psInputEnable: 0
+ maxMemoryClusterDWords: 8
+ mode:
+ ieee: false
+ dx10-clamp: true
+ fp32-input-denormals: false
+ fp32-output-denormals: false
+ fp64-fp16-input-denormals: true
+ fp64-fp16-output-denormals: true
+ highBitsOf32BitAddress: 0
+ occupancy: 16
+ vgprForAGPRCopy: ''
+ sgprForEXECCopy: '$sgpr12_sgpr13'
+ longBranchReservedReg: ''
+ hasInitWholeWave: false
+ dynamicVGPRBlockSize: 0
+ scratchReservedForDynamicVGPRs: 0
+body: |
+ bb.0 (%ir-block.0):
+ liveins: $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
+
+ %8:vgpr_32 = COPY killed $vgpr2
----------------
arsenm wrote:
Compact register numbers with -run-pass=none
https://github.com/llvm/llvm-project/pull/145125
More information about the llvm-commits
mailing list