[llvm] [AMDGPU] Fix resource analysis crash on alias-to-alias function (PR #99034)

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 16 06:17:04 PDT 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/99034

>From 9d89054b7f06cb5914d6baecd518d3ae5ac9a6c1 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Tue, 16 Jul 2024 08:08:53 -0500
Subject: [PATCH] [AMDGPU] Fix resource analysis crash on alias-to-alias
 function

Summary:
Previously this code only looked through a single level of aliases to
find the underlying function. This patch changes it to continue until it
finds the end. Aliases that form a cycle are illegal IR, so we shouldn't
need to worry about infinite loops.

Fixes https://github.com/llvm/llvm-project/issues/96812
---
 .../AMDGPU/AMDGPUResourceUsageAnalysis.cpp    |  7 ++-
 llvm/test/CodeGen/AMDGPU/global-alias.ll      | 56 +++++++++++++++++++
 2 files changed, 60 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/global-alias.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 2fe9cd242ff19..3bf72d1a5d40a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -65,9 +65,10 @@ static const Function *getCalleeFunction(const MachineOperand &Op) {
     assert(Op.getImm() == 0);
     return nullptr;
   }
-  if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
-    return cast<Function>(GA->getOperand(0));
-  return cast<Function>(Op.getGlobal());
+  const GlobalValue *GV = Op.getGlobal();
+  while (auto *GA = dyn_cast<GlobalAlias>(GV))
+    GV = cast<GlobalValue>(GA->getOperand(0));
+  return cast<Function>(GV);
 }
 
 static bool hasAnyNonFlatUseOfReg(const MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AMDGPU/global-alias.ll b/llvm/test/CodeGen/AMDGPU/global-alias.ll
new file mode 100644
index 0000000000000..5c1c4977cabb0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/global-alias.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -verify-machineinstrs %s -o - | FileCheck %s
+
+ at foo_a = alias void (ptr), ptr @foo
+ at bar_a = alias void (ptr), ptr @foo_a
+
+define void @foo() {
+; CHECK-LABEL: foo:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  ret void
+}
+
+define void @bar() {
+; CHECK-LABEL: bar:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s16, s33
+; CHECK-NEXT:    s_mov_b32 s33, s32
+; CHECK-NEXT:    s_or_saveexec_b64 s[18:19], -1
+; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
+; CHECK-NEXT:    s_mov_b64 exec, s[18:19]
+; CHECK-NEXT:    s_waitcnt expcnt(0)
+; CHECK-NEXT:    v_writelane_b32 v40, s16, 2
+; CHECK-NEXT:    s_addk_i32 s32, 0x400
+; CHECK-NEXT:    v_writelane_b32 v40, s30, 0
+; CHECK-NEXT:    v_writelane_b32 v40, s31, 1
+; CHECK-NEXT:    s_getpc_b64 s[16:17]
+; CHECK-NEXT:    s_add_u32 s16, s16, bar_a at gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s17, s17, bar_a at gotpcrel32@hi+12
+; CHECK-NEXT:    s_load_dwordx2 s[16:17], s[16:17], 0x0
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT:    v_readlane_b32 s31, v40, 1
+; CHECK-NEXT:    v_readlane_b32 s30, v40, 0
+; CHECK-NEXT:    v_readlane_b32 s4, v40, 2
+; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
+; CHECK-NEXT:    s_mov_b64 exec, s[6:7]
+; CHECK-NEXT:    s_addk_i32 s32, 0xfc00
+; CHECK-NEXT:    s_mov_b32 s33, s4
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  call void @bar_a(ptr null)
+  ret void
+}
+
+; UTC_ARGS: --disable
+; CHECK: .set foo_a, foo
+; CHECK: .set bar_a, foo_a
+; UTC_ARGS: --enable



More information about the llvm-commits mailing list