[llvm] 170de19 - [AMDGPU] Latency calculation must be independent of meta insts (#177052)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 21 10:20:00 PST 2026


Author: LU-JOHN
Date: 2026-01-21T12:19:55-06:00
New Revision: 170de19a5b2dc2b9855d73794f139d1ff84bce47

URL: https://github.com/llvm/llvm-project/commit/170de19a5b2dc2b9855d73794f139d1ff84bce47
DIFF: https://github.com/llvm/llvm-project/commit/170de19a5b2dc2b9855d73794f139d1ff84bce47.diff

LOG: [AMDGPU] Latency calculation must be independent of meta insts (#177052)

Debug and other meta instructions in bundles must not affect latency
calculation.
Ensure that code compiled with and without debug instructions is
identical.

---------

Signed-off-by: John Lu <John.Lu at amd.com>

Added: 
    llvm/test/CodeGen/AMDGPU/debug-independence-adjustSchedDependency.ll

Modified: 
    llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
    llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index c8bbcbbd76928..0488968a1a2ec 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -644,6 +644,8 @@ void GCNSubtarget::adjustSchedDependency(
     MachineBasicBlock::const_instr_iterator E(DefI->getParent()->instr_end());
     unsigned Lat = 0;
     for (++I; I != E && I->isBundledWithPred(); ++I) {
+      if (I->isMetaInstruction())
+        continue;
       if (I->modifiesRegister(Reg, TRI))
         Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);
       else if (Lat)
@@ -657,6 +659,8 @@ void GCNSubtarget::adjustSchedDependency(
     MachineBasicBlock::const_instr_iterator E(UseI->getParent()->instr_end());
     unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *DefI);
     for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
+      if (I->isMetaInstruction())
+        continue;
       if (I->readsRegister(Reg, TRI))
         break;
       --Lat;

diff  --git a/llvm/test/CodeGen/AMDGPU/debug-independence-adjustSchedDependency.ll b/llvm/test/CodeGen/AMDGPU/debug-independence-adjustSchedDependency.ll
new file mode 100644
index 0000000000000..0ea51b2b3e212
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/debug-independence-adjustSchedDependency.ll
@@ -0,0 +1,44 @@
+; RUN: opt %s  -strip-debug -o %t.no_debug.ll -S
+; RUN: llc -mcpu=gfx1250 < %s             -filetype=obj -o %t.with_debug.o
+; RUN: llc -mcpu=gfx1250 < %t.no_debug.ll -filetype=obj -o %t.no_debug.o
+; RUN: llvm-strip %t.with_debug.o %t.no_debug.o
+; RUN: cmp %t.with_debug.o %t.no_debug.o
+; Ensure that compiling with and without debug generates identical code.
+; Test that adjustSchedDependency does not count debug instructions in bundles.
+
+target triple = "amdgcn-amd-amdhsa"
+
+define amdgpu_kernel void @_test_adjustSchedDependency(ptr addrspace(1) %AA.coerce, i64 %shiftA, i32 %lda, ptr addrspace(3) %stPtr) !dbg !4 {
+entry:
+    #dbg_value(i32 0, !10, !DIExpression(), !13)
+    #dbg_value(ptr addrspace(1) %AA.coerce, !14, !DIExpression(), !13)
+  %add.ptr1.i = getelementptr float, ptr addrspace(1) %AA.coerce, i64 %shiftA
+  %mul15.13 = mul i32 %lda, 13
+  %idxprom.13 = sext i32 %mul15.13 to i64
+  %arrayidx.13 = getelementptr float, ptr addrspace(1) %add.ptr1.i, i64 %idxprom.13
+  %floatval = load float, ptr addrspace(1) %arrayidx.13, align 4
+  %floatpair = insertelement <2 x float> zeroinitializer, float %floatval, i64 0
+  store <2 x float> %floatpair, ptr addrspace(3) %stPtr, align 4
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "AMD clang version 22.0.0git (ssh://github-emu/AMD-Lightning-Internal/llvm-project  25425 c51a87b7a53a3e8f308402aaffa3ecbc2953305a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, imports: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.cpp", directory: "/tmp", checksumkind: CSK_MD5, checksum: "cc205700bf3536fe4ff21a07daf7e01d")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "test_adjustSchedDependency", linkageName: "_test_adjustSchedDependency", scope: !6, file: !5, line: 142, type: !8, scopeLine: 150, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, templateParams: !2, retainedNodes: !2)
+!5 = !DIFile(filename: "kernels.hpp", directory: "/tmp")
+!6 = !DINamespace(name: "v33200", scope: !7, exportSymbols: true)
+!7 = !DINamespace(name: "solve", scope: null)
+!8 = distinct !DISubroutineType(types: !9)
+!9 = !{null}
+!10 = !DILocalVariable(name: "m", arg: 1, scope: !4, file: !5, line: 142, type: !11)
+!11 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !12)
+!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!13 = !DILocation(line: 0, scope: !4)
+!14 = !DILocalVariable(name: "AA", arg: 2, scope: !4, file: !5, line: 143, type: !15)
+!15 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !16, size: 64)
+!16 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index 2bcc5df70d94d..5c26ff6e916c1 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -15960,24 +15960,24 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
 ; GFX10-NEXT:    v_writelane_b32 v40, s22, 18
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s52
-; GFX10-NEXT:    v_mov_b32_e32 v1, s47
 ; GFX10-NEXT:    v_writelane_b32 v40, s23, 19
-; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
-; GFX10-NEXT:    v_mov_b32_e32 v0, s46
+; GFX10-NEXT:    v_mov_b32_e32 v1, s47
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s48
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s49
+; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
 ; GFX10-NEXT:    v_writelane_b32 v40, s24, 20
+; GFX10-NEXT:    v_mov_b32_e32 v0, s46
 ; GFX10-NEXT:    s_mov_b32 s20, s36
 ; GFX10-NEXT:    s_mov_b32 s21, s37
 ; GFX10-NEXT:    s_mov_b32 s22, s38
-; GFX10-NEXT:    s_mov_b32 s23, s39
 ; GFX10-NEXT:    v_writelane_b32 v40, s25, 21
+; GFX10-NEXT:    s_mov_b32 s23, s39
 ; GFX10-NEXT:    s_mov_b32 s24, s40
 ; GFX10-NEXT:    s_mov_b32 s25, s41
 ; GFX10-NEXT:    v_mov_b32_e32 v4, s50
-; GFX10-NEXT:    v_mov_b32_e32 v5, s51
 ; GFX10-NEXT:    v_writelane_b32 v40, s26, 22
 ; GFX10-NEXT:    s_mov_b32 s26, s42
+; GFX10-NEXT:    v_mov_b32_e32 v5, s51
 ; GFX10-NEXT:    buffer_store_dword v0, off, s[0:3], s32
 ; GFX10-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
 ; GFX10-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
@@ -16180,27 +16180,27 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
 ; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s22, 18
 ; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v6, s2
-; GFX10-SCRATCH-NEXT:    s_add_i32 s2, s32, 24
-; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, s50
 ; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s23, 19
+; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v4, s50
 ; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v5, s51
 ; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v0, s46
 ; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v1, s47
-; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, s48
 ; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s24, 20
+; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v2, s48
 ; GFX10-SCRATCH-NEXT:    v_mov_b32_e32 v3, s49
+; GFX10-SCRATCH-NEXT:    s_add_i32 s2, s32, 24
 ; GFX10-SCRATCH-NEXT:    s_mov_b32 s20, s36
+; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s25, 21
 ; GFX10-SCRATCH-NEXT:    s_mov_b32 s21, s37
 ; GFX10-SCRATCH-NEXT:    s_mov_b32 s22, s38
-; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s25, 21
 ; GFX10-SCRATCH-NEXT:    s_mov_b32 s23, s39
 ; GFX10-SCRATCH-NEXT:    s_mov_b32 s24, s40
+; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s26, 22
 ; GFX10-SCRATCH-NEXT:    s_mov_b32 s25, s41
+; GFX10-SCRATCH-NEXT:    s_mov_b32 s26, s42
 ; GFX10-SCRATCH-NEXT:    scratch_store_dword off, v6, s2
 ; GFX10-SCRATCH-NEXT:    scratch_store_dwordx2 off, v[4:5], s3
 ; GFX10-SCRATCH-NEXT:    scratch_store_dwordx4 off, v[0:3], s32
-; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s26, 22
-; GFX10-SCRATCH-NEXT:    s_mov_b32 s26, s42
 ; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s27, 23
 ; GFX10-SCRATCH-NEXT:    s_mov_b32 s27, s43
 ; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s28, 24


        


More information about the llvm-commits mailing list