[clang] a10eb07 - Do not append terminating NUL to the binary string with embedded fatbin.

Mon Oct 17 15:42:43 PDT 2022

Author: Artem Belevich
Date: 2022-10-17T15:39:39-07:00
New Revision: a10eb07d1acc2f132b4d0cf522097814a8340b47

URL: https://github.com/llvm/llvm-project/commit/a10eb07d1acc2f132b4d0cf522097814a8340b47
DIFF: https://github.com/llvm/llvm-project/commit/a10eb07d1acc2f132b4d0cf522097814a8340b47.diff

LOG: Do not append terminating NUL to the binary string with embedded fatbin.

Extra NUL does not impact functionality of the generated code, but it confuses
various NVIDIA tools used to examine embedded GPU binaries.

Differential Revision: https://reviews.llvm.org/D135832

Added: 
    

Modified: 
    clang/lib/CodeGen/CGCUDANV.cpp
    clang/test/CodeGenCUDA/device-stub.cu

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index a8bb0dd65d1a..abf320996dc4 100644

--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -69,6 +69,8 @@ class CGNVCUDARuntime : public CGCUDARuntime {
   bool RelocatableDeviceCode;
   /// Mangle context for device.
   std::unique_ptr<MangleContext> DeviceMC;
+  /// Some zeros used for GEPs.
+  llvm::Constant *Zeros[2];
 
   llvm::FunctionCallee getSetupArgumentFn() const;
   llvm::FunctionCallee getLaunchFn() const;
@@ -86,14 +88,25 @@ class CGNVCUDARuntime : public CGCUDARuntime {
   /// the start of the string.  The result of this function can be used anywhere
   /// where the C code specifies const char*.
   llvm::Constant *makeConstantString(const std::string &Str,
-                                     const std::string &Name = "",
-                                     const std::string &SectionName = "",
-                                     unsigned Alignment = 0) {
-    llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
-                               llvm::ConstantInt::get(SizeTy, 0)};
+                                     const std::string &Name = "") {
     auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
-    llvm::GlobalVariable *GV =
-        cast<llvm::GlobalVariable>(ConstStr.getPointer());
+    return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
+                                                ConstStr.getPointer(), Zeros);
+  }
+
+  /// Helper function which generates an initialized constant array from Str,
+  /// and optionally sets section name and alignment. AddNull specifies whether
+  /// the array should nave NUL termination.
+  llvm::Constant *makeConstantArray(StringRef Str,
+                                    StringRef Name = "",
+                                    StringRef SectionName = "",
+                                    unsigned Alignment = 0,
+                                    bool AddNull = false) {
+    llvm::Constant *Value =
+        llvm::ConstantDataArray::getString(Context, Str, AddNull);
+    auto *GV = new llvm::GlobalVariable(
+        TheModule, Value->getType(), /*isConstant=*/true,
+        llvm::GlobalValue::PrivateLinkage, Value, Name);
     if (!SectionName.empty()) {
       GV->setSection(SectionName);
       // Mark the address as used which make sure that this section isn't
@@ -102,9 +115,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
     }
     if (Alignment)
       GV->setAlignment(llvm::Align(Alignment));
-
-    return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
-                                                ConstStr.getPointer(), Zeros);
+    return llvm::ConstantExpr::getGetElementPtr(GV->getValueType(), GV, Zeros);
   }
 
   /// Helper function that generates an empty dummy function returning void.
@@ -220,6 +231,8 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
   IntTy = CGM.IntTy;
   SizeTy = CGM.SizeTy;
   VoidTy = CGM.VoidTy;
+  Zeros[0] = llvm::ConstantInt::get(SizeTy, 0);
+  Zeros[1] = Zeros[0];
 
   CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
   VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
@@ -744,9 +757,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
       // If fatbin is available from early finalization, create a string
       // literal containing the fat binary loaded from the given file.
       const unsigned HIPCodeObjectAlign = 4096;
-      FatBinStr =
-          makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
-                             FatbinConstantName, HIPCodeObjectAlign);
+      FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "",
+                                    FatbinConstantName, HIPCodeObjectAlign);
     } else {
       // If fatbin is not available, create an external symbol
       // __hip_fatbin in section .hip_fatbin. The external symbol is supposed
@@ -780,8 +792,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
 
     // For CUDA, create a string literal containing the fat binary loaded from
     // the given file.
-    FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
-                                   FatbinConstantName, 8);
+    FatBinStr = makeConstantArray(std::string(CudaGpuBinary->getBuffer()), "",
+                                  FatbinConstantName, 8);
     FatMagic = CudaFatMagic;
   }
 
@@ -888,8 +900,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
     SmallString<64> ModuleID;
     llvm::raw_svector_ostream OS(ModuleID);
     OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
-    llvm::Constant *ModuleIDConstant = makeConstantString(
-        std::string(ModuleID.str()), "", ModuleIDSectionName, 32);
+    llvm::Constant *ModuleIDConstant = makeConstantArray(
+        std::string(ModuleID.str()), "", ModuleIDSectionName, 32, /*AddNull=*/true);
 
     // Create an alias for the FatbinWrapper that nvcc will look for.
     llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,

diff  --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu
index 0f925a29c215..8605df8a3770 100644
--- a/clang/test/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CodeGenCUDA/device-stub.cu
@@ -1,4 +1,4 @@
-// RUN: echo "GPU binary would be here" > %t
+// RUN: echo -n "GPU binary would be here." > %t
 // RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -target-sdk-version=8.0 -fcuda-include-gpubinary %t -o - \
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s \
@@ -159,8 +159,8 @@ __device__ void device_use() {
 // ALL: @3 = private unnamed_addr constant [19 x i8] c"ext_device_var_def\00"
 // ALL: @4 = private unnamed_addr constant [21 x i8] c"ext_constant_var_def\00"
 // * constant unnamed string with GPU binary
-// CUDA: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",
-// HIPEF: @[[FATBIN:.*]] = private constant{{.*GPU binary would be here.*}}\00",{{.*}}align 4096
+// CUDA: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.",
+// HIPEF: @[[FATBIN:.*]] = private constant{{.*}} c"GPU binary would be here.",{{.*}}align 4096
 // HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
 // CUDANORDC-SAME: section ".nv_fatbin", align 8
 // CUDARDC-SAME: section "__nv_relfatbin", align 8