[clang] 0567f03 - [HIP] [AlwaysInliner] Disable AlwaysInliner to eliminate undefined symbols

Anshil Gandhi via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 18 15:53:38 PDT 2021


Author: Anshil Gandhi
Date: 2021-10-18T16:53:15-06:00
New Revision: 0567f0333176e476e15b7f32b463f58f7475ff22

URL: https://github.com/llvm/llvm-project/commit/0567f0333176e476e15b7f32b463f58f7475ff22
DIFF: https://github.com/llvm/llvm-project/commit/0567f0333176e476e15b7f32b463f58f7475ff22.diff

LOG: [HIP] [AlwaysInliner] Disable AlwaysInliner to eliminate undefined symbols

By default clang emits complete contructors as alias of base constructors if they are the same.
The backend is supposed to emit symbols for the alias, otherwise it causes undefined symbols.
@yaxunl observed that this issue is related to the llvm options `-amdgpu-early-inline-all=true`
and `-amdgpu-function-calls=false`. This issue is resolved by only inlining global values
with internal linkage. The `getCalleeFunction()` in AMDGPUResourceUsageAnalysis also had
to be extended to support aliases to functions. inline-calls.ll was corrected appropriately.

Reviewed By: yaxunl, #amdgpu

Differential Revision: https://reviews.llvm.org/D109707

Added: 
    clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu

Modified: 
    clang/lib/Driver/ToolChains/Clang.cpp
    llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
    llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
    llvm/test/CodeGen/AMDGPU/inline-calls.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 83afbc3952d84..316c6026adf5c 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5089,9 +5089,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   // Enable -mconstructor-aliases except on darwin, where we have to work around
-  // a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
-  // where aliases aren't supported.
-  if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
+  // a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
+  // aliases aren't supported.
+  if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
     CmdArgs.push_back("-mconstructor-aliases");
 
   // Darwin's kernel doesn't support guard variables; just die if we

diff  --git a/clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu b/clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu
new file mode 100644
index 0000000000000..ec7b7c3b7ff4c
--- /dev/null
+++ b/clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu
@@ -0,0 +1,17 @@
+// REQUIRES: amdgpu-registered-target, clang-driver
+
+// RUN: %clang -target x86_64-unknown-linux-gnu --offload-arch=gfx906 --cuda-device-only -nogpulib -nogpuinc -x hip -emit-llvm -S -o - %s \
+// RUN:   -fgpu-rdc -O3 -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false | \
+// RUN:   FileCheck %s
+
+#include "Inputs/cuda.h"
+
+// CHECK: %struct.B = type { i8 }
+struct B {
+
+  // CHECK: @_ZN1BC1Ei = hidden unnamed_addr alias void (%struct.B*, i32), void (%struct.B*, i32)* @_ZN1BC2Ei
+  __device__ B(int x);
+};
+
+__device__ B::B(int x) {
+}

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 7ff24d1e9c62b..2e24e9f929d2a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -15,6 +15,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUTargetMachine.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
@@ -90,9 +91,13 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
 
   SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
   SmallPtrSet<Function *, 8> FuncsToNoInline;
+  Triple TT(M.getTargetTriple());
 
   for (GlobalAlias &A : M.aliases()) {
     if (Function* F = dyn_cast<Function>(A.getAliasee())) {
+      if (TT.getArch() == Triple::amdgcn &&
+          A.getLinkage() != GlobalValue::InternalLinkage)
+        continue;
       A.replaceAllUsesWith(F);
       AliasesToRemove.push_back(&A);
     }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index e841e939ef34b..3c5cb6e190850 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -29,6 +29,8 @@
 #include "SIMachineFunctionInfo.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
@@ -61,7 +63,8 @@ static const Function *getCalleeFunction(const MachineOperand &Op) {
     assert(Op.getImm() == 0);
     return nullptr;
   }
-
+  if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
+    return cast<Function>(GA->getOperand(0));
   return cast<Function>(Op.getGlobal());
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/inline-calls.ll b/llvm/test/CodeGen/AMDGPU/inline-calls.ll
index 233485a202057..134cd301b9743 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-calls.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck  %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck  %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple amdgcn-unknown-linux-gnu -mcpu=tahiti -verify-machineinstrs < %s | FileCheck  %s
+; RUN: llc -mtriple amdgcn-unknown-linux-gnu -mcpu=tonga -verify-machineinstrs < %s | FileCheck  %s
+; RUN: llc -mtriple r600-unknown-linux-gnu -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s --check-prefix=R600
 
 ; ALL-NOT: {{^}}func:
 define internal i32 @func(i32 %a) {
@@ -9,7 +9,7 @@ entry:
   ret i32 %tmp0
 }
 
-; ALL: {{^}}kernel:
+; CHECK: {{^}}kernel:
 ; GCN-NOT: s_swappc_b64
 define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) {
 entry:
@@ -18,12 +18,13 @@ entry:
   ret void
 }
 
-; CHECK-NOT: func_alias
-; ALL-NOT: func_alias
+; CHECK: func_alias
+; R600-NOT: func_alias
 @func_alias = alias i32 (i32), i32 (i32)* @func
 
-; ALL: {{^}}kernel3:
+; CHECK-NOT: {{^}}kernel3:
 ; GCN-NOT: s_swappc_b64
+; R600: {{^}}kernel3:
 define amdgpu_kernel void @kernel3(i32 addrspace(1)* %out) {
 entry:
   %tmp0 = call i32 @func_alias(i32 1)


        


More information about the cfe-commits mailing list