[clang] cbc4bbb - [HIP] Ignore host linker flags for device-only
Siu Chi Chan via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 17 13:29:30 PDT 2023
Author: Siu Chi Chan
Date: 2023-07-17T16:29:15-04:00
New Revision: cbc4bbb85c729f34bf0cba84ccd2e116af1454f5
URL: https://github.com/llvm/llvm-project/commit/cbc4bbb85c729f34bf0cba84ccd2e116af1454f5
DIFF: https://github.com/llvm/llvm-project/commit/cbc4bbb85c729f34bf0cba84ccd2e116af1454f5.diff
LOG: [HIP] Ignore host linker flags for device-only
When compiling in device only mode (e.g. --offload-device-only), the
host linker phase would not happen and therefore, the driver should
ignore all the host linker flags.
Differential Revision: https://reviews.llvm.org/D154881
Change-Id: I8244acef5c33108cf15b1dbb188f974f30099718
Added:
Modified:
clang/lib/Driver/Driver.cpp
clang/test/Driver/hip-phases.hip
Removed:
################################################################################
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index b4a070587901fb..211a65e4cde628 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4146,9 +4146,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
// Queue linker inputs.
if (Phase == phases::Link) {
assert(Phase == PL.back() && "linking must be final compilation step.");
- // We don't need to generate additional link commands if emitting AMD bitcode
+ // We don't need to generate additional link commands if emitting AMD
+ // bitcode or compiling only for the offload device
if (!(C.getInputArgs().hasArg(options::OPT_hip_link) &&
- (C.getInputArgs().hasArg(options::OPT_emit_llvm))))
+ (C.getInputArgs().hasArg(options::OPT_emit_llvm))) &&
+ !offloadDeviceOnly())
LinkerInputs.push_back(Current);
Current = nullptr;
break;
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
index 063a16a7939ec6..e976583820ccf7 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -219,6 +219,14 @@
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \
// RUN: | FileCheck -check-prefixes=DBIN %s
+//
+// Test single gpu architecture with complete compilation in device-only
+// compilation mode with an unused host linker flag.
+//
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -Wl,--disable-new-dtags 2>&1 \
+// RUN: | FileCheck -check-prefixes=DBIN %s
+
// DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
// DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
@@ -229,6 +237,7 @@
// DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, )
// DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P7]]}, hip-fatbin
// DBIN-NOT: host
+
//
// Test single gpu architecture up to the assemble phase in device-only
// compilation mode.
@@ -251,6 +260,11 @@
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
// RUN: | FileCheck -check-prefixes=RELOC %s
+//
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable -Wl,--disable-new-dtags \
+// RUN: 2>&1 | FileCheck -check-prefixes=RELOC %s
+//
// RELOC-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
// RELOC-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// RELOC-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
@@ -258,6 +272,7 @@
// RELOC-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
// RELOC-NOT: linker
// RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
+// RELOC-NOT: host
//
// Test two gpu architectures with compile to relocatable in device-only
@@ -266,6 +281,11 @@
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
// RUN: | FileCheck -check-prefixes=RELOC2 %s
+//
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -fhip-emit-relocatable \
+// RUN: -Wl,--disable-new-dtags 2>&1 | FileCheck -check-prefixes=RELOC2 %s
+//
// RELOC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
// RELOC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// RELOC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
@@ -280,6 +300,7 @@
// RELOC2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
// RELOC2-NOT: linker
// RELOC2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P10]]}, object
+// RELOC2-NOT: host
//
// Test two gpu architectures with complete compilation in device-only
@@ -288,6 +309,14 @@
// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
// RUN: 2>&1 | FileCheck -check-prefixes=DBIN2 %s
+//
+// Test two gpu architectures with complete compilation in device-only
+// compilation mode with an unused host linker flag.
+//
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
+// RUN: -Wl,--disable-new-dtags 2>&1 | FileCheck -check-prefixes=DBIN2 %s
+
// DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
// DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
// DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
@@ -305,6 +334,7 @@
// DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip, )
// DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P14]]}, hip-fatbin
// DBIN2-NOT: host
+
//
// Test two gpu architectures up to the assemble phase in device-only
// compilation mode.
@@ -357,11 +387,21 @@
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
// RUN: -fgpu-rdc --cuda-device-only 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
+// RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
+// RUN: -fgpu-rdc --cuda-device-only -Wl,--disable-new-dtags 2>&1 \
+// RUN: | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
+
// RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
// RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output 2>&1 \
// RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
+// RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
+// RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output -Wl,--disable-new-dtags 2>&1 \
+// RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
+
// L2-DAG: [[P0:[0-9]+]]: input, "{{.*}}obj1.o", object
// RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object
// L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object
@@ -381,6 +421,7 @@
// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image
// RL2-EM-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]])
// RL2-DEV-NOT: linker
+// RL2-NB-NOT: host
// Test one gpu architectures up to the preprocessor expansion output phase in device-only
// compilation mode. no bundle.
More information about the cfe-commits
mailing list