[llvm] [compiler-rt] [clang] [clang-tools-extra] [AMDGPU] Avoid hitting AMDGPUAsmPrinter related asserts for local functions at O0 (PR #72129)

Janek van Oirschot via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 17 08:51:42 PST 2023


https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/72129

>From 04b0856ae59ff367fd1b6736d49430648976cb25 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Mon, 13 Nov 2023 07:38:15 -0800
Subject: [PATCH 1/3] [AMDGPU] Avoid hitting AMDGPUAsmPrinter related asserts
 for local functions at O0.

Local functions will be ignored for (codegen) CGSCC order passes.
However, they may still be referenced. This patch allows such local
functions to exist in AMDGPUResourceUsageAnalysis and bypasses
referencing the local function's MachineFunction in AMDGPUAsmPrinter's
code emit by reinserting the CGSCC pass manager through DummyCGSCCPass.
---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |  8 ++
 .../AMDGPU/AMDGPUResourceUsageAnalysis.cpp    | 13 ++-
 .../CodeGen/AMDGPU/insert-delay-alu-bug.ll    | 24 ++---
 llvm/test/CodeGen/AMDGPU/ipra.ll              | 14 +--
 llvm/test/CodeGen/AMDGPU/llc-pipeline.ll      | 65 ++++++++-----
 .../AMDGPU/lower-module-lds-offsets.ll        | 26 ++---
 .../AMDGPU/module-lds-false-sharing.ll        | 97 +++++++++----------
 .../AMDGPU/resource-usage-dead-function.ll    |  2 +-
 8 files changed, 139 insertions(+), 110 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index eb30f31af6d6b68..67cc858a182c1e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1238,6 +1238,14 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
 void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AMDGPUResourceUsageAnalysis>();
   AU.addPreserved<AMDGPUResourceUsageAnalysis>();
+
+  // The Dummy pass is necessary because AMDGPUResourceUsageAnalysis will pop
+  // the CGSCC pass manager off of the active pass managers stack. Adding the
+  // Dummy pass will re-insert the CGSCC pass manager into said stack again
+  // through CallGraphSCCPass::assignPassManager.
+  AU.addRequired<DummyCGSCCPass>();
+  AU.addPreserved<DummyCGSCCPass>();
+
   AsmPrinter::getAnalysisUsage(AU);
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index db5d2bbcf5bbc71..25bdeff7ff6742c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -151,9 +151,16 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
 
     SIFunctionResourceInfo &Info = CI.first->second;
     MachineFunction *MF = MMI.getMachineFunction(*F);
-    assert(MF && "function must have been generated already");
-    Info = analyzeResourceUsage(*MF, TM);
-    HasIndirectCall |= Info.HasIndirectCall;
+    // We can only analyze resource usage of functions for which there exists a
+    // machinefunction equivalent. These may not exist as the (codegen) passes
+    // prior to this one are run in CGSCC order which will bypass any local
+    // functions that aren't called.
+    assert((MF || !TPC->requiresCodeGenSCCOrder()) &&
+            "function must have been generated already");
+    if (MF) {
+      Info = analyzeResourceUsage(*MF, TM);
+      HasIndirectCall |= Info.HasIndirectCall;
+    }
   }
 
   if (HasIndirectCall)
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
index 220ea962b9e1dca..83b02e5b47f0e7e 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
@@ -3,6 +3,18 @@
 
 declare i32 @llvm.amdgcn.workitem.id.x()
 
+define <2 x i64> @f1() #0 {
+; GFX11-LABEL: f1:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    v_mov_b32_e32 v3, 0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  ret <2 x i64> zeroinitializer
+}
+
 define void @f0() {
 ; GFX11-LABEL: f0:
 ; GFX11:       ; %bb.0: ; %bb
@@ -36,18 +48,6 @@ bb:
   ret void
 }
 
-define <2 x i64> @f1() #0 {
-; GFX11-LABEL: f1:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11-NEXT:    v_mov_b32_e32 v1, 0
-; GFX11-NEXT:    v_mov_b32_e32 v2, 0
-; GFX11-NEXT:    v_mov_b32_e32 v3, 0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  ret <2 x i64> zeroinitializer
-}
-
 ; FIXME: This generates "instid1(/* invalid instid value */)".
 define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg4, i1 %arg5, ptr %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i1 %arg11) {
 ; GFX11-LABEL: f2:
diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll
index 07693371278701c..766d8352b26f195 100644
--- a/llvm/test/CodeGen/AMDGPU/ipra.ll
+++ b/llvm/test/CodeGen/AMDGPU/ipra.ll
@@ -105,13 +105,6 @@ define void @test_funcx2() #0 {
   ret void
 }
 
-; GCN-LABEL: {{^}}wombat:
-define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) {
-bb:
-  call void @hoge() #0
-  ret void
-}
-
 ; Make sure we save/restore the return address around the call.
 ; Function Attrs: norecurse
 define internal void @hoge() #2 {
@@ -128,6 +121,13 @@ bb:
   ret void
 }
 
+; GCN-LABEL: {{^}}wombat:
+define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) {
+bb:
+  call void @hoge() #0
+  ret void
+}
+
 declare dso_local void @eggs()
 
 
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 7abb789019f1f0c..55242bf353b82a2 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -142,11 +142,14 @@
 ; GCN-O0-NEXT:        Machine Optimization Remark Emitter
 ; GCN-O0-NEXT:        Stack Frame Layout Analysis
 ; GCN-O0-NEXT:    Function register usage analysis
-; GCN-O0-NEXT:    FunctionPass Manager
-; GCN-O0-NEXT:      Lazy Machine Block Frequency Analysis
-; GCN-O0-NEXT:      Machine Optimization Remark Emitter
-; GCN-O0-NEXT:      AMDGPU Assembly Printer
-; GCN-O0-NEXT:      Free MachineFunction
+; GCN-O0-NEXT:    CallGraph Construction
+; GCN-O0-NEXT:    Call Graph SCC Pass Manager
+; GCN-O0-NEXT:      DummyCGSCCPass
+; GCN-O0-NEXT:      FunctionPass Manager
+; GCN-O0-NEXT:        Lazy Machine Block Frequency Analysis
+; GCN-O0-NEXT:        Machine Optimization Remark Emitter
+; GCN-O0-NEXT:        AMDGPU Assembly Printer
+; GCN-O0-NEXT:        Free MachineFunction
 
 ; GCN-O1:Target Library Information
 ; GCN-O1-NEXT:Target Pass Configuration
@@ -409,11 +412,14 @@
 ; GCN-O1-NEXT:        Machine Optimization Remark Emitter
 ; GCN-O1-NEXT:        Stack Frame Layout Analysis
 ; GCN-O1-NEXT:    Function register usage analysis
-; GCN-O1-NEXT:    FunctionPass Manager
-; GCN-O1-NEXT:      Lazy Machine Block Frequency Analysis
-; GCN-O1-NEXT:      Machine Optimization Remark Emitter
-; GCN-O1-NEXT:      AMDGPU Assembly Printer
-; GCN-O1-NEXT:      Free MachineFunction
+; GCN-O1-NEXT:    CallGraph Construction
+; GCN-O1-NEXT:    Call Graph SCC Pass Manager
+; GCN-O1-NEXT:      DummyCGSCCPass
+; GCN-O1-NEXT:      FunctionPass Manager
+; GCN-O1-NEXT:        Lazy Machine Block Frequency Analysis
+; GCN-O1-NEXT:        Machine Optimization Remark Emitter
+; GCN-O1-NEXT:        AMDGPU Assembly Printer
+; GCN-O1-NEXT:        Free MachineFunction
 
 ; GCN-O1-OPTS:Target Library Information
 ; GCN-O1-OPTS-NEXT:Target Pass Configuration
@@ -698,11 +704,14 @@
 ; GCN-O1-OPTS-NEXT:        Machine Optimization Remark Emitter
 ; GCN-O1-OPTS-NEXT:        Stack Frame Layout Analysis
 ; GCN-O1-OPTS-NEXT:    Function register usage analysis
-; GCN-O1-OPTS-NEXT:    FunctionPass Manager
-; GCN-O1-OPTS-NEXT:      Lazy Machine Block Frequency Analysis
-; GCN-O1-OPTS-NEXT:      Machine Optimization Remark Emitter
-; GCN-O1-OPTS-NEXT:      AMDGPU Assembly Printer
-; GCN-O1-OPTS-NEXT:      Free MachineFunction
+; GCN-O1-OPTS-NEXT:    CallGraph Construction
+; GCN-O1-OPTS-NEXT:    Call Graph SCC Pass Manager
+; GCN-O1-OPTS-NEXT:      DummyCGSCCPass
+; GCN-O1-OPTS-NEXT:      FunctionPass Manager
+; GCN-O1-OPTS-NEXT:        Lazy Machine Block Frequency Analysis
+; GCN-O1-OPTS-NEXT:        Machine Optimization Remark Emitter
+; GCN-O1-OPTS-NEXT:        AMDGPU Assembly Printer
+; GCN-O1-OPTS-NEXT:        Free MachineFunction
 
 ; GCN-O2:Target Library Information
 ; GCN-O2-NEXT:Target Pass Configuration
@@ -999,11 +1008,14 @@
 ; GCN-O2-NEXT:        Machine Optimization Remark Emitter
 ; GCN-O2-NEXT:        Stack Frame Layout Analysis
 ; GCN-O2-NEXT:    Function register usage analysis
-; GCN-O2-NEXT:    FunctionPass Manager
-; GCN-O2-NEXT:      Lazy Machine Block Frequency Analysis
-; GCN-O2-NEXT:      Machine Optimization Remark Emitter
-; GCN-O2-NEXT:      AMDGPU Assembly Printer
-; GCN-O2-NEXT:      Free MachineFunction
+; GCN-O2-NEXT:    CallGraph Construction
+; GCN-O2-NEXT:    Call Graph SCC Pass Manager
+; GCN-O2-NEXT:      DummyCGSCCPass
+; GCN-O2-NEXT:      FunctionPass Manager
+; GCN-O2-NEXT:        Lazy Machine Block Frequency Analysis
+; GCN-O2-NEXT:        Machine Optimization Remark Emitter
+; GCN-O2-NEXT:        AMDGPU Assembly Printer
+; GCN-O2-NEXT:        Free MachineFunction
 
 ; GCN-O3:Target Library Information
 ; GCN-O3-NEXT:Target Pass Configuration
@@ -1312,11 +1324,14 @@
 ; GCN-O3-NEXT:        Machine Optimization Remark Emitter
 ; GCN-O3-NEXT:        Stack Frame Layout Analysis
 ; GCN-O3-NEXT:    Function register usage analysis
-; GCN-O3-NEXT:    FunctionPass Manager
-; GCN-O3-NEXT:      Lazy Machine Block Frequency Analysis
-; GCN-O3-NEXT:      Machine Optimization Remark Emitter
-; GCN-O3-NEXT:      AMDGPU Assembly Printer
-; GCN-O3-NEXT:      Free MachineFunction
+; GCN-O3-NEXT:    CallGraph Construction
+; GCN-O3-NEXT:    Call Graph SCC Pass Manager
+; GCN-O3-NEXT:      DummyCGSCCPass
+; GCN-O3-NEXT:      FunctionPass Manager
+; GCN-O3-NEXT:        Lazy Machine Block Frequency Analysis
+; GCN-O3-NEXT:        Machine Optimization Remark Emitter
+; GCN-O3-NEXT:        AMDGPU Assembly Printer
+; GCN-O3-NEXT:        Free MachineFunction
 
 define void @empty() {
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll
index c4449756949e521..44eff0d67399014 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-offsets.ll
@@ -9,6 +9,19 @@
 @lds.size.1.align.1 = internal unnamed_addr addrspace(3) global [1 x i8] undef, align 1
 @lds.size.16.align.16 = internal unnamed_addr addrspace(3) global [16 x i8] undef, align 16
 
+; GCN-LABEL: {{^}}f0:
+; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
+; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3
+; GCN:     ds_write_b8 [[NULL]], [[TREE]]
+define void @f0() {
+; OPT-LABEL: @f0(
+; OPT-NEXT:    store i8 3, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1
+; OPT-NEXT:    ret void
+;
+  store i8 3, ptr addrspace(3) @lds.size.1.align.1, align 1
+  ret void
+}
+
 ; GCN-LABEL: {{^}}k0:
 ; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
 ; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
@@ -29,16 +42,3 @@ define amdgpu_kernel void @k0() {
   call void @f0()
   ret void
 }
-
-; GCN-LABEL: {{^}}f0:
-; GCN-DAG: v_mov_b32_e32 [[NULL:v[0-9]+]], 0
-; GCN-DAG: v_mov_b32_e32 [[TREE:v[0-9]+]], 3
-; GCN:     ds_write_b8 [[NULL]], [[TREE]]
-define void @f0() {
-; OPT-LABEL: @f0() {
-; OPT-NEXT:    store i8 3, ptr addrspace(3) @llvm.amdgcn.module.lds, align 1
-; OPT-NEXT:    ret void
-;
-  store i8 3, ptr addrspace(3) @lds.size.1.align.1, align 1
-  ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
index e557e0ce9b1be5e..7458fccb9e9f68a 100644
--- a/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
+++ b/llvm/test/CodeGen/AMDGPU/module-lds-false-sharing.ll
@@ -24,6 +24,54 @@ store i32 0, ptr addrspace(3) @used_by_kernel
 }
 ; CHECK: ; LDSByteSize: 4 bytes
 
+define void @nonkernel() {
+; GFX9-LABEL: nonkernel:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    v_mov_b32_e32 v1, v0
+; GFX9-NEXT:    ds_write_b32 v0, v0 offset:8
+; GFX9-NEXT:    ds_write_b64 v0, v[0:1]
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: nonkernel:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    v_mov_b32_e32 v1, v0
+; GFX10-NEXT:    ds_write_b32 v0, v0 offset:8
+; GFX10-NEXT:    ds_write_b64 v0, v[0:1]
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; G_GFX9-LABEL: nonkernel:
+; G_GFX9:       ; %bb.0:
+; G_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; G_GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; G_GFX9-NEXT:    v_mov_b32_e32 v3, 8
+; G_GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; G_GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; G_GFX9-NEXT:    ds_write_b32 v3, v2
+; G_GFX9-NEXT:    ds_write_b64 v2, v[0:1]
+; G_GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; G_GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; G_GFX10-LABEL: nonkernel:
+; G_GFX10:       ; %bb.0:
+; G_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; G_GFX10-NEXT:    v_mov_b32_e32 v2, 0
+; G_GFX10-NEXT:    v_mov_b32_e32 v3, 8
+; G_GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; G_GFX10-NEXT:    v_mov_b32_e32 v1, 0
+; G_GFX10-NEXT:    ds_write_b32 v3, v2
+; G_GFX10-NEXT:    ds_write_b64 v2, v[0:1]
+; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; G_GFX10-NEXT:    s_setpc_b64 s[30:31]
+  store i32 0, ptr addrspace(3) @used_by_both
+  store double 0.0, ptr addrspace(3) @used_by_function
+  ret void
+}
 ; Needs to allocate both variables, store to used_by_both is at sizeof(double)
 define amdgpu_kernel void @withcall() {
 ; GFX9-LABEL: withcall:
@@ -135,52 +183,3 @@ define amdgpu_kernel void @nocall_false_sharing() {
 }
 ; CHECK: ; LDSByteSize: 4 bytes
 
-
-define void @nonkernel() {
-; GFX9-LABEL: nonkernel:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v1, v0
-; GFX9-NEXT:    ds_write_b32 v0, v0 offset:8
-; GFX9-NEXT:    ds_write_b64 v0, v[0:1]
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: nonkernel:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10-NEXT:    v_mov_b32_e32 v1, v0
-; GFX10-NEXT:    ds_write_b32 v0, v0 offset:8
-; GFX10-NEXT:    ds_write_b64 v0, v[0:1]
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; G_GFX9-LABEL: nonkernel:
-; G_GFX9:       ; %bb.0:
-; G_GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; G_GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; G_GFX9-NEXT:    v_mov_b32_e32 v3, 8
-; G_GFX9-NEXT:    v_mov_b32_e32 v0, 0
-; G_GFX9-NEXT:    v_mov_b32_e32 v1, 0
-; G_GFX9-NEXT:    ds_write_b32 v3, v2
-; G_GFX9-NEXT:    ds_write_b64 v2, v[0:1]
-; G_GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; G_GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; G_GFX10-LABEL: nonkernel:
-; G_GFX10:       ; %bb.0:
-; G_GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; G_GFX10-NEXT:    v_mov_b32_e32 v2, 0
-; G_GFX10-NEXT:    v_mov_b32_e32 v3, 8
-; G_GFX10-NEXT:    v_mov_b32_e32 v0, 0
-; G_GFX10-NEXT:    v_mov_b32_e32 v1, 0
-; G_GFX10-NEXT:    ds_write_b32 v3, v2
-; G_GFX10-NEXT:    ds_write_b64 v2, v[0:1]
-; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; G_GFX10-NEXT:    s_setpc_b64 s[30:31]
-  store i32 0, ptr addrspace(3) @used_by_both
-  store double 0.0, ptr addrspace(3) @used_by_function
-  ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
index c30089a8dd32a0f..b606f3c6209a37d 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
@@ -6,7 +6,7 @@
 
 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
 
-; GCN-LABEL: unreachable:
+; GCN-NOT: unreachable:
 ; Function info:
 ; codeLenInByte = 4
 define internal fastcc void @unreachable() {

>From 14901a74ecb94043bee35d612d8fee41843e1613 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Mon, 13 Nov 2023 15:42:22 -0800
Subject: [PATCH 2/3] Fix format, assert and add codegen test for uncalled
 local functions

---
 .../lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp |  4 ++--
 llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll  | 11 +++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 25bdeff7ff6742c..481a5ebe66004bc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -155,8 +155,8 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
     // machinefunction equivalent. These may not exist as the (codegen) passes
     // prior to this one are run in CGSCC order which will bypass any local
     // functions that aren't called.
-    assert((MF || !TPC->requiresCodeGenSCCOrder()) &&
-            "function must have been generated already");
+    assert((MF || TPC->requiresCodeGenSCCOrder()) &&
+           "function must have been generated already");
     if (MF) {
       Info = analyzeResourceUsage(*MF, TM);
       HasIndirectCall |= Info.HasIndirectCall;
diff --git a/llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll b/llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll
new file mode 100644
index 000000000000000..e3eb206d8be067a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll
@@ -0,0 +1,11 @@
+; RUN: llc -O0 -march=amdgcn < %s | FileCheck %s
+
+; CHECK-NOT: foo
+define internal i32 @foo() {
+  ret i32 0
+}
+
+; CHECK-NOT: bar
+define private i32 @bar() {
+  ret i32 0
+}

>From 6b28cb2cc052870bfed7013e6e03262df36f5bd8 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Fri, 17 Nov 2023 08:50:07 -0800
Subject: [PATCH 3/3] Improve test

---
 .../AMDGPU/uncalled-local-functions.ll        | 88 +++++++++++++++++--
 1 file changed, 83 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll b/llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll
index e3eb206d8be067a..a0023b70f28b094 100644
--- a/llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/uncalled-local-functions.ll
@@ -1,11 +1,89 @@
-; RUN: llc -O0 -march=amdgcn < %s | FileCheck %s
+; RUN: llc -O0 -march=amdgcn -mcpu=gfx90a < %s | FileCheck %s
+; REQUIRES: asserts
 
-; CHECK-NOT: foo
-define internal i32 @foo() {
+ at alias = internal alias i32, i32* @aliased_internal_func
+ at alias_taken = internal alias i32, i32* @aliased_taken_func
+
+; CHECK-NOT: internal_func
+define internal i32 @internal_func() {
   ret i32 0
 }
 
-; CHECK-NOT: bar
-define private i32 @bar() {
+; CHECK-NOT: private_func
+define private i32 @private_func() {
+  ret i32 0
+}
+
+; CHECK-NOT: aliased_internal_func
+define internal i32 @aliased_internal_func() {
+  ret i32 0
+}
+
+; CHECK-LABEL: take_alias_addr
+; CHECK:      Function info:
+; CHECK-NEXT: codeLenInByte = 60
+; CHECK-NEXT: NumSgprs: 37
+; CHECK-NEXT: NumVgprs: 1
+; CHECK-NEXT: NumAgprs: 0
+; CHECK-NEXT: TotalNumVgprs: 1
+; CHECK-NEXT: ScratchSize: 16
+; CHECK-NEXT: MemoryBound: 0
+define void @take_alias_addr() {
+  %addr_loc = alloca ptr, addrspace(5)
+  store ptr @alias_taken, ptr addrspace(5) %addr_loc
+  ret void
+}
+
+; CHECK: aliased_taken_func
+; CHECK:      Function info:
+; CHECK-NEXT: codeLenInByte = 12
+; CHECK-NEXT: NumSgprs: 36
+; CHECK-NEXT: NumVgprs: 1
+; CHECK-NEXT: NumAgprs: 0
+; CHECK-NEXT: TotalNumVgprs: 1
+; CHECK-NEXT: ScratchSize: 0
+; CHECK-NEXT: MemoryBound: 0
+define internal i32 @aliased_taken_func() {
   ret i32 0
 }
+
+; CHECK-LABEL: addr_taken
+; CHECK:      Function info:
+; CHECK-NEXT: codeLenInByte = 12
+; CHECK-NEXT: NumSgprs: 36
+; CHECK-NEXT: NumVgprs: 1
+; CHECK-NEXT: NumAgprs: 0
+; CHECK-NEXT: TotalNumVgprs: 1
+; CHECK-NEXT: ScratchSize: 0
+; CHECK-NEXT: MemoryBound: 0
+define internal i32 @addr_taken() {
+  ret i32 0
+}
+
+; CHECK-LABEL: non_local
+; CHECK:      Function info:
+; CHECK-NEXT: codeLenInByte = 12
+; CHECK-NEXT: NumSgprs: 36
+; CHECK-NEXT: NumVgprs: 1
+; CHECK-NEXT: NumAgprs: 0
+; CHECK-NEXT: TotalNumVgprs: 1
+; CHECK-NEXT: ScratchSize: 0
+; CHECK-NEXT: MemoryBound: 0
+define i32 @non_local() {
+  ret i32 0
+}
+
+; CHECK-LABEL: take_addr
+; CHECK:      Function info:
+; CHECK-NEXT: codeLenInByte = 60
+; CHECK-NEXT: NumSgprs: 37
+; CHECK-NEXT: NumVgprs: 1
+; CHECK-NEXT: NumAgprs: 0
+; CHECK-NEXT: TotalNumVgprs: 1
+; CHECK-NEXT: ScratchSize: 16
+; CHECK-NEXT: MemoryBound: 0
+define void @take_addr() {
+  %addr_loc = alloca ptr, addrspace(5)
+  store ptr @addr_taken, ptr addrspace(5) %addr_loc
+  ret void
+}



More information about the cfe-commits mailing list