[clang] 3b172f6 - [HIP] Fix -fgpu-rdc for Windows
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Mon Dec 6 13:42:45 PST 2021
Author: Yaxun (Sam) Liu
Date: 2021-12-06T16:42:23-05:00
New Revision: 3b172f60c6926b43f87de1eaa3fa4c873bdb6de4
URL: https://github.com/llvm/llvm-project/commit/3b172f60c6926b43f87de1eaa3fa4c873bdb6de4
DIFF: https://github.com/llvm/llvm-project/commit/3b172f60c6926b43f87de1eaa3fa4c873bdb6de4.diff
LOG: [HIP] Fix -fgpu-rdc for Windows
This patch fixes issues for -fgpu-rdc for Windows MSVC
toolchain:
Fix COFF specific section flags and remove section types
in llvm-mc input file for Windows.
Escape fatbin path in llvm-mc input file.
Add -triple option to llvm-mc.
Put __hip_gpubin_handle in comdat when it has linkonce_odr
linkage.
Reviewed by: Artem Belevich
Differential Revision: https://reviews.llvm.org/D115039
Added:
Modified:
clang/lib/CodeGen/CGCUDANV.cpp
clang/lib/Driver/ToolChains/HIP.cpp
clang/test/CodeGenCUDA/device-stub.cu
clang/test/Driver/hip-toolchain-rdc.hip
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index a1b4431ca8c43..c4e3f7f54f4f2 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -814,6 +814,9 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
Linkage,
/*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
"__hip_gpubin_handle");
+ if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage)
+ GpuBinaryHandle->setComdat(
+ CGM.getModule().getOrInsertComdat(GpuBinaryHandle->getName()));
GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
// Prevent the weak symbol in
diff erent shared libraries being merged.
if (Linkage != llvm::GlobalValue::InternalLinkage)
diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp
index 07af1a0457c79..097cfaa771415 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -183,8 +183,7 @@ void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
const InputInfoList &Inputs, const ArgList &Args,
const JobAction &JA) const {
const ToolChain &TC = getToolChain();
- std::string Name =
- std::string(llvm::sys::path::stem(Output.getFilename()));
+ std::string Name = std::string(llvm::sys::path::stem(Output.getFilename()));
// Create Temp Object File Generator,
// Offload Bundled file and Bundled Object file.
@@ -206,20 +205,29 @@ void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
std::string ObjBuffer;
llvm::raw_string_ostream ObjStream(ObjBuffer);
+ auto HostTriple =
+ C.getSingleOffloadToolChain<Action::OFK_Host>()->getTriple();
+
// Add MC directives to embed target binaries. We ensure that each
// section and image is 16-byte aligned. This is not mandatory, but
// increases the likelihood of data to be aligned with a cache block
// in several main host machines.
ObjStream << "# HIP Object Generator\n";
ObjStream << "# *** Automatically generated by Clang ***\n";
- ObjStream << " .protected __hip_fatbin\n";
- ObjStream << " .type __hip_fatbin, at object\n";
- ObjStream << " .section .hip_fatbin,\"a\", at progbits\n";
+ if (HostTriple.isWindowsMSVCEnvironment()) {
+ ObjStream << " .section .hip_fatbin, \"dw\"\n";
+ } else {
+ ObjStream << " .protected __hip_fatbin\n";
+ ObjStream << " .type __hip_fatbin, at object\n";
+ ObjStream << " .section .hip_fatbin,\"a\", at progbits\n";
+ }
ObjStream << " .globl __hip_fatbin\n";
ObjStream << " .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign))
<< "\n";
ObjStream << "__hip_fatbin:\n";
- ObjStream << " .incbin \"" << BundleFile << "\"\n";
+ ObjStream << " .incbin ";
+ llvm::sys::printArg(ObjStream, BundleFile, /*Quote=*/true);
+ ObjStream << "\n";
ObjStream.flush();
// Dump the contents of the temp object file gen if the user requested that.
@@ -238,7 +246,8 @@ void AMDGCN::Linker::constructGenerateObjFileFromHIPFatBinary(
Objf << ObjBuffer;
- ArgStringList McArgs{"-o", Output.getFilename(),
+ ArgStringList McArgs{"-triple", Args.MakeArgString(HostTriple.normalize()),
+ "-o", Output.getFilename(),
McinFile, "--filetype=obj"};
const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc"));
C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu
index 0de58e1ba02d8..462faad982439 100644
--- a/clang/test/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CodeGenCUDA/device-stub.cu
@@ -57,8 +57,14 @@
// RUN: -fcuda-include-gpubinary %t -o - -x hip\
// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,WIN
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -aux-triple amdgcn -emit-llvm %s \
+// RUN: -o - -x hip\
+// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,WIN,HIP,HIPNEF
+
#include "Inputs/cuda.h"
+// HIPNEF: $__hip_gpubin_handle = comdat any
+
#ifndef NOGLOBALS
// NORDC-DAG: @device_var = internal global i32
// RDC-DAG: @device_var = global i32
diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip
index abcc14538486b..7d06e39f6079f 100644
--- a/clang/test/Driver/hip-toolchain-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-rdc.hip
@@ -11,14 +11,31 @@
// RUN: -fhip-dump-offload-linker-script \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LNX %s
+
+// RUN: %clang -### -target x86_64-pc-windows-msvc \
+// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
+// RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \
+// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
+// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \
+// RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \
+// RUN: -fhip-dump-offload-linker-script \
+// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,MSVC %s
// check code object alignment in dumped llvm-mc input
-// CHECK: .protected __hip_fatbin
+// LNX: .protected __hip_fatbin
+// LNX: .type __hip_fatbin, at object
+// LNX: .section .hip_fatbin,"a", at progbits
+// MSVC: .section .hip_fatbin, "dw"
+// CHECK: .globl __hip_fatbin
// CHECK: .p2align 12
+// CHECK: __hip_fatbin:
+// CHECK: .incbin "[[BUNDLE:.*hipfb]]"
// emit objects for host side path
-// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" [[HOST:"x86_64-[^"]+"]]
// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
@@ -26,7 +43,7 @@
// CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip"
// CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]]
-// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
+// CHECK: [[CLANG]] "-cc1" "-triple" [[HOST]]
// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa"
// CHECK-SAME: "-emit-obj"
// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
@@ -36,7 +53,7 @@
// generate image for device side path on gfx803
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" [[HOST:"x86_64-[^"]+"]]
// CHECK-SAME: "-emit-llvm-bc"
// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
@@ -48,7 +65,7 @@
// CHECK-SAME: {{.*}} [[A_SRC]]
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" [[HOST]]
// CHECK-SAME: "-emit-llvm-bc"
// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
// CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden"
@@ -68,7 +85,7 @@
// generate image for device side path on gfx900
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" [[HOST]]
// CHECK-SAME: "-emit-llvm-bc"
// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
// CHECK-SAME: "-fcuda-is-device"
@@ -78,7 +95,7 @@
// CHECK-SAME: {{.*}} [[A_SRC]]
// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
-// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// CHECK-SAME: "-aux-triple" [[HOST]]
// CHECK-SAME: "-emit-llvm-bc"
// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
// CHECK-SAME: "-fcuda-is-device"
@@ -99,9 +116,10 @@
// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
// CHECK-SAME: "-bundle-align=4096"
// CHECK-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900"
-// CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]"
+// CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE]]"
-// CHECK: [[MC:".*llvm-mc.*"]] "-o" [[OBJBUNDLE:".*o"]] "{{.*}}.mcin" "--filetype=obj"
+// CHECK: [[MC:".*llvm-mc.*"]] "-triple" [[HOST]] "-o" [[OBJBUNDLE:".*o"]] "{{.*}}.mcin" "--filetype=obj"
// output the executable
-// CHECK: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
+// LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
+// MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]
More information about the cfe-commits
mailing list