[clang] 1bae02b - [Cuda] Use fallback method to mangle externalized decls if no CUID given

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Thu May 26 06:18:36 PDT 2022


Author: Joseph Huber
Date: 2022-05-26T09:18:22-04:00
New Revision: 1bae02b77335eb1a01d9a0bb36c2b2a29dfdd5d9

URL: https://github.com/llvm/llvm-project/commit/1bae02b77335eb1a01d9a0bb36c2b2a29dfdd5d9
DIFF: https://github.com/llvm/llvm-project/commit/1bae02b77335eb1a01d9a0bb36c2b2a29dfdd5d9.diff

LOG: [Cuda] Use fallback method to mangle externalized decls if no CUID given

CUDA requires that static variables be visible to the host when
offloading. However, The standard semantics of a stiatc variable dictate
that it should not be visible outside of the current file. In order to
access it from the host we need to perform "externalization" on the
static variable on the device. This requires generating a semi-unique
name that can be affixed to the variable as to not cause linker errors.

This is currently done using the CUID functionality, an MD5 hash value
set up by the clang driver. This allows us to achieve is mostly unique
ID that is unique even between multiple compilations of the same file.
However, this is not always availible. Instead, this patch uses the
unique ID from the file to generate a unique symbol name. This will
create a unique name that is consistent between the host and device side
compilations without requiring the CUID to be entered by the driver. The
one downside to this is that we are no longer stable under multiple
compilations of the same file. However, this is a very niche use-case
and is not supported by Nvidia's CUDA compiler so it likely to be good
enough.

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D125904

Added: 
    

Modified: 
    clang/lib/CodeGen/CGCUDANV.cpp
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/lib/CodeGen/CodeGenModule.h
    clang/test/CodeGenCUDA/device-fun-linkage.cu
    clang/test/CodeGenCUDA/static-device-var-rdc.cu

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 10f8bd222b7e7..6f2679cb15e4c 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -285,8 +285,7 @@ std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
 
   // Make unique name for device side static file-scope variable for HIP.
   if (CGM.getContext().shouldExternalize(ND) &&
-      CGM.getLangOpts().GPURelocatableDeviceCode &&
-      !CGM.getLangOpts().CUID.empty()) {
+      CGM.getLangOpts().GPURelocatableDeviceCode) {
     SmallString<256> Buffer;
     llvm::raw_svector_ostream Out(Buffer);
     Out << DeviceSideName;

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 703cf4edf5f56..a035e5ddd9e6d 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1416,8 +1416,9 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
   // Make unique name for device side static file-scope variable for HIP.
   if (CGM.getContext().shouldExternalize(ND) &&
       CGM.getLangOpts().GPURelocatableDeviceCode &&
-      CGM.getLangOpts().CUDAIsDevice && !CGM.getLangOpts().CUID.empty())
+      CGM.getLangOpts().CUDAIsDevice)
     CGM.printPostfixForExternalizedDecl(Out, ND);
+
   return std::string(Out.str());
 }
 
@@ -6825,12 +6826,38 @@ bool CodeGenModule::stopAutoInit() {
 
 void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
                                                     const Decl *D) const {
-  StringRef Tag;
   // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers
   // postfix beginning with '.' since the symbol name can be demangled.
   if (LangOpts.HIP)
-    Tag = (isa<VarDecl>(D) ? ".static." : ".intern.");
+    OS << (isa<VarDecl>(D) ? ".static." : ".intern.");
   else
-    Tag = (isa<VarDecl>(D) ? "__static__" : "__intern__");
-  OS << Tag << getContext().getCUIDHash();
+    OS << (isa<VarDecl>(D) ? "__static__" : "__intern__");
+
+  // If the CUID is not specified we try to generate a unique postfix.
+  if (getLangOpts().CUID.empty()) {
+    SourceManager &SM = getContext().getSourceManager();
+    PresumedLoc PLoc = SM.getPresumedLoc(D->getLocation());
+    assert(PLoc.isValid() && "Source location is expected to be valid.");
+
+    // Get the hash of the user defined macros.
+    llvm::MD5 Hash;
+    llvm::MD5::MD5Result Result;
+    for (const auto &Arg : PreprocessorOpts.Macros)
+      Hash.update(Arg.first);
+    Hash.final(Result);
+
+    // Get the UniqueID for the file containing the decl.
+    llvm::sys::fs::UniqueID ID;
+    if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
+      PLoc = SM.getPresumedLoc(D->getLocation(), /*UseLineDirectives=*/false);
+      assert(PLoc.isValid() && "Source location is expected to be valid.");
+      if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
+        SM.getDiagnostics().Report(diag::err_cannot_open_file)
+            << PLoc.getFilename() << EC.message();
+    }
+    OS << llvm::format("%x", ID.getFile()) << llvm::format("%x", ID.getDevice())
+       << "_" << llvm::utohexstr(Result.low(), /*LowerCase=*/true, /*Width=*/8);
+  } else {
+    OS << getContext().getCUIDHash();
+  }
 }

diff  --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 8393d43682ea5..a5ec4c8f988d6 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1467,7 +1467,10 @@ class CodeGenModule : public CodeGenTypeCache {
   bool stopAutoInit();
 
   /// Print the postfix for externalized static variable or kernels for single
-  /// source offloading languages CUDA and HIP.
+  /// source offloading languages CUDA and HIP. The unique postfix is created
+  /// using either the CUID argument, or the file's UniqueID and active macros.
+  /// The fallback method without a CUID requires that the offloading toolchain
+  /// does not define separate macros via the -cc1 options.
   void printPostfixForExternalizedDecl(llvm::raw_ostream &OS,
                                        const Decl *D) const;
 

diff  --git a/clang/test/CodeGenCUDA/device-fun-linkage.cu b/clang/test/CodeGenCUDA/device-fun-linkage.cu
index d8ad6d438be9c..54899e0e9c0f1 100644
--- a/clang/test/CodeGenCUDA/device-fun-linkage.cu
+++ b/clang/test/CodeGenCUDA/device-fun-linkage.cu
@@ -23,10 +23,10 @@ template __global__ void kernel<int>();
 // Ensure that unused static device function is eliminated
 static __device__ void static_func() {}
 // NORDC-NEG-NOT: define{{.*}} void @_ZL13static_funcv()
-// RDC-NEG-NOT:   define{{.*}} void @_ZL13static_funcv()
+// RDC-NEG-NOT:   define{{.*}} void @_ZL13static_funcv[[FILEID:.*]]()
 
 // Ensure that kernel function has external or weak_odr
 // linkage regardless static specifier
 static __global__ void static_kernel() {}
 // NORDC:     define void @_ZL13static_kernelv()
-// RDC:       define weak_odr void @_ZL13static_kernelv()
+// RDC:       define weak_odr void @_ZL13static_kernelv[[FILEID:.*]]()

diff  --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu
index 81a2bd0fd455c..a9cca4e9212ab 100644
--- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -2,12 +2,12 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
-// RUN:   -check-prefixes=DEV,INT-DEV %s
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o %t.nocuid.dev -x hip %s
+// RUN: cat %t.nocuid.dev | FileCheck -check-prefixes=DEV,INT-DEV %s
 
 // RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-gnu-linux \
-// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
-// RUN:   -check-prefixes=HOST,INT-HOST %s
+// RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o %t.nocuid.host -x hip %s
+// RUN: cat %t.nocuid.host | FileCheck -check-prefixes=HOST,INT-HOST %s
 
 // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
 // RUN:   -std=c++11 -fgpu-rdc -emit-llvm -o - -x hip %s > %t.dev
@@ -21,6 +21,7 @@
 // variable names.
 
 // RUN: cat %t.dev %t.host | FileCheck -check-prefix=POSTFIX %s
+// RUN: cat %t.nocuid.dev %t.nocuid.host | FileCheck -check-prefix=POSTFIX-ID %s
 
 // Negative tests.
 
@@ -48,6 +49,9 @@
 
 #include "Inputs/cuda.h"
 
+// Make sure we can still mangle with a line directive.
+#line 0 "-"
+
 // Test function scope static device variable, which should not be externalized.
 // DEV-DAG: @_ZZ6kernelPiPPKiE1w = internal addrspace(4) constant i32 1
 
@@ -56,8 +60,8 @@
 // HOST-DAG: @_ZL1y = internal global i32 undef
 
 // Test normal static device variables
-// INT-DEV-DAG: @_ZL1x = addrspace(1) externally_initialized global i32 0
-// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x\00"
+// INT-DEV-DAG: @_ZL1x[[FILEID:.*]] = addrspace(1) externally_initialized global i32 0
+// INT-HOST-DAG: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x[[FILEID:.*]]\00"
 
 // Test externalized static device variables
 // EXT-DEV-DAG: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
@@ -66,6 +70,8 @@
 
 // POSTFIX: @_ZL1x.static.[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
 // POSTFIX: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[HASH]]\00"
+// POSTFIX-ID: @_ZL1x.static.[[FILEID:.*]] = addrspace(1) externally_initialized global i32 0
+// POSTFIX-ID: @[[DEVNAMEX:[0-9]+]] = {{.*}}c"_ZL1x.static.[[FILEID]]\00"
 
 static __device__ int x;
 
@@ -75,8 +81,8 @@ static __device__ int x;
 static __device__ int x2;
 
 // Test normal static device variables
-// INT-DEV-DAG: @_ZL1y = addrspace(4) externally_initialized global i32 0
-// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
+// INT-DEV-DAG: @_ZL1y[[FILEID:.*]] = addrspace(4) externally_initialized global i32 0
+// INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y[[FILEID:.*]]\00"
 
 // Test externalized static device variables
 // EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0


        


More information about the cfe-commits mailing list