[clang] 98ab43a - [HIP] Fix device only linking for -fgpu-rdc
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 10 14:38:27 PST 2022
Author: Yaxun (Sam) Liu
Date: 2022-01-10T17:38:02-05:00
New Revision: 98ab43a1d209875ce9cc39420d3e6af57ed0f5b2
URL: https://github.com/llvm/llvm-project/commit/98ab43a1d209875ce9cc39420d3e6af57ed0f5b2
DIFF: https://github.com/llvm/llvm-project/commit/98ab43a1d209875ce9cc39420d3e6af57ed0f5b2.diff
LOG: [HIP] Fix device only linking for -fgpu-rdc
Currently when -fgpu-rdc is specified, HIP toolchain always does host linking even
if --cuda-device-only is specified.
This patch fixes that. Only device linking is performed when --cuda-device-only
is specified.
Reviewed by: Artem Belevich
Differential Revision: https://reviews.llvm.org/D116840
Added:
Modified:
clang/lib/Driver/Driver.cpp
clang/test/Driver/hip-phases.hip
clang/test/Driver/hip-toolchain-rdc-separate.hip
Removed:
################################################################################
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index bb7ccf7dd97eb..ac8438bb45a6a 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -3110,7 +3110,7 @@ class OffloadingActionBuilder final {
// We will pass the device action as a host dependence, so we don't
// need to do anything else with them.
CudaDeviceActions.clear();
- return ABRT_Success;
+ return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
}
// By default, we produce an action for each device arch.
@@ -3143,6 +3143,7 @@ class OffloadingActionBuilder final {
assert(DeviceLinkerInputs.size() == GpuArchList.size() &&
"Linker inputs and GPU arch list sizes do not match.");
+ ActionList Actions;
// Append a new link action for each device.
unsigned I = 0;
for (auto &LI : DeviceLinkerInputs) {
@@ -3154,22 +3155,29 @@ class OffloadingActionBuilder final {
OffloadAction::DeviceDependences DeviceLinkDeps;
DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0],
GpuArchList[I], AssociatedOffloadKind);
- AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
- DeviceLinkAction->getType()));
+ Actions.push_back(C.MakeAction<OffloadAction>(
+ DeviceLinkDeps, DeviceLinkAction->getType()));
++I;
}
DeviceLinkerInputs.clear();
// Create a host object from all the device images by embedding them
- // in a fat binary.
+ // in a fat binary for mixed host-device compilation. For device-only
+ // compilation, creates a fat binary.
OffloadAction::DeviceDependences DDeps;
- auto *TopDeviceLinkAction =
- C.MakeAction<LinkJobAction>(AL, types::TY_Object);
- DDeps.add(*TopDeviceLinkAction, *ToolChains[0],
- nullptr, AssociatedOffloadKind);
-
- // Offload the host object to the host linker.
- AL.push_back(C.MakeAction<OffloadAction>(DDeps, TopDeviceLinkAction->getType()));
+ if (!CompileDeviceOnly || !BundleOutput.hasValue() ||
+ BundleOutput.getValue()) {
+ auto *TopDeviceLinkAction = C.MakeAction<LinkJobAction>(
+ Actions,
+ CompileDeviceOnly ? types::TY_HIP_FATBIN : types::TY_Object);
+ DDeps.add(*TopDeviceLinkAction, *ToolChains[0], nullptr,
+ AssociatedOffloadKind);
+ // Offload the host object to the host linker.
+ AL.push_back(
+ C.MakeAction<OffloadAction>(DDeps, TopDeviceLinkAction->getType()));
+ } else {
+ AL.append(Actions);
+ }
}
Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); }
@@ -3556,15 +3564,18 @@ class OffloadingActionBuilder final {
return false;
}
- Action* makeHostLinkAction() {
- // Build a list of device linking actions.
- ActionList DeviceAL;
+ void appendDeviceLinkActions(ActionList &AL) {
for (DeviceActionBuilder *SB : SpecializedBuilders) {
if (!SB->isValid())
continue;
- SB->appendLinkDeviceActions(DeviceAL);
+ SB->appendLinkDeviceActions(AL);
}
+ }
+ Action *makeHostLinkAction() {
+ // Build a list of device linking actions.
+ ActionList DeviceAL;
+ appendDeviceLinkActions(DeviceAL);
if (DeviceAL.empty())
return nullptr;
@@ -3893,6 +3904,13 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
}
// Add a link action if necessary.
+
+ if (LinkerInputs.empty()) {
+ Arg *FinalPhaseArg;
+ if (getFinalPhase(Args, &FinalPhaseArg) == phases::Link)
+ OffloadBuilder.appendDeviceLinkActions(Actions);
+ }
+
if (!LinkerInputs.empty()) {
if (Action *Wrapper = OffloadBuilder.makeHostLinkAction())
LinkerInputs.push_back(Wrapper);
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
index 623299b13dd0c..93bf635dab63f 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -311,22 +311,36 @@
//
// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %T/obj1.o %T/obj2.o \
-// RUN: -fgpu-rdc 2>&1 | FileCheck -check-prefixes=L2,RL2 %s
+// RUN: -fgpu-rdc 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-EM %s
//
-// L2-DAG: [[P0:[0-9]+]]: input, "{{.*}}obj1.o", object, (host-[[T:hip]])
-// RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object, (host-[[T]])
-// L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object, (host-[[T]])
-// RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object, (host-[[T]])
+// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %T/obj1.o %T/obj2.o \
+// RUN: -fgpu-rdc --cuda-device-only 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
-// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH1:gfx803]])
+// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %T/obj1.o %T/obj2.o \
+// RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output 2>&1 \
+// RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
+
+// L2-DAG: [[P0:[0-9]+]]: input, "{{.*}}obj1.o", object
+// RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object
+// L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object
+// RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object
+
+// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T:hip]], [[ARCH1:gfx803]])
// RL2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P4]]}, image
// RL2-DAG: [[P6:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH2:gfx900]])
// RL2-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image
-// RL2-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, object, (device-[[T]])
-// RL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, object
-
-// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T]])
-// RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]])
+// RL2-DEV-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, hip-fatbin, (device-[[T]])
+// RL2-DEV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, hip-fatbin
+// RL2-EM-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, object, (device-[[T]])
+// RL2-EM-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, object
+// RL2-NB-NOT: linker
+// RL2-NB-NOT: offload
+
+// NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image, (host-[[T:hip]])
+// RL2-EM-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]])
+// RL2-DEV-NOT: linker
// Test one gpu architectures up to the preprocessor expansion output phase in device-only
// compilation mode. no bundle.
diff --git a/clang/test/Driver/hip-toolchain-rdc-separate.hip b/clang/test/Driver/hip-toolchain-rdc-separate.hip
index 698ee14e74dc9..a0cf0493832cd 100644
--- a/clang/test/Driver/hip-toolchain-rdc-separate.hip
+++ b/clang/test/Driver/hip-toolchain-rdc-separate.hip
@@ -88,47 +88,66 @@
// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
// RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \
// RUN: %T/a.o %T/b.o \
-// RUN: 2>&1 | FileCheck -check-prefix=LINK %s
+// RUN: 2>&1 | FileCheck -check-prefixes=LINK,LINK-HOST-UNBUNDLE,LLD-TMP,LINK-BUNDLE,LINK-EMBED %s
-// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
-// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
-// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],{{.*o}},{{.*o}}"
-// LINK: "-unbundle" "-allow-missing-bundles"
+// RUN: %clang --hip-link -### -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
+// RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \
+// RUN: %T/a.o %T/b.o --cuda-device-only \
+// RUN: 2>&1 | FileCheck -check-prefixes=LINK,LLD-TMP,LINK-BUNDLE,LINK-NOEMBED %s
-// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
-// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
-// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],{{.*o}},{{.*o}}"
-// LINK: "-unbundle" "-allow-missing-bundles"
+// RUN: %clang --hip-link -### -target x86_64-linux-gnu \
+// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
+// RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \
+// RUN: %T/a.o %T/b.o --cuda-device-only --no-gpu-bundle-output \
+// RUN: 2>&1 | FileCheck -check-prefixes=LINK,LLD-FIN,LINK-NOBUNDLE,LINK-NOEMBED %s
+
+// LINK-HOST-UNBUNDLE: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
+// LINK-HOST-UNBUNDLE-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
+// LINK-HOST-UNBUNDLE-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs=[[A_OBJ_HOST:.*o]],{{.*o}},{{.*o}}"
+// LINK-HOST-UNBUNDLE: "-unbundle" "-allow-missing-bundles"
+
+// LINK-HOST-UNBUNDLE: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
+// LINK-HOST-UNBUNDLE-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
+// LINK-HOST-UNBUNDLE-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs=[[B_OBJ_HOST:.*o]],{{.*o}},{{.*o}}"
+// LINK-HOST-UNBUNDLE: "-unbundle" "-allow-missing-bundles"
// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
-// LINK-SAME: "-inputs=[[A_O]]" "-outputs={{.*o}},[[A_BC1:.*o]],[[A_BC2:.*o]]"
-// LINK: "-unbundle" "-allow-missing-bundles"
+// LINK-SAME: "-inputs=[[A_O:.*a.o]]" "-outputs={{.*o}},[[A_BC1:.*o]],[[A_BC2:.*o]]"
+// LINK-SAME: "-unbundle" "-allow-missing-bundles"
// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
// LINK-SAME: "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
-// LINK-SAME: "-inputs=[[B_O]]" "-outputs={{.*o}},[[B_BC1:.*o]],[[B_BC2:.*o]]"
-// LINK: "-unbundle" "-allow-missing-bundles"
+// LINK-SAME: "-inputs=[[B_O:.*b.o]]" "-outputs={{.*o}},[[B_BC1:.*o]],[[B_BC2:.*o]]"
+// LINK-SAME: "-unbundle" "-allow-missing-bundles"
// LINK-NOT: "*.llvm-link"
// LINK-NOT: ".*opt"
// LINK-NOT: ".*llc"
// LINK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
-// LINK: "-plugin-opt=mcpu=gfx803"
-// LINK-SAME: "-o" "[[IMG_DEV1:.*.out]]" "[[A_BC1]]" "[[B_BC1]]"
+// LINK-SAME: "-plugin-opt=mcpu=gfx803"
+// LLD-TMP-SAME: "-o" "[[IMG_DEV1:.*.out]]"
+// LLD-FIN-SAME: "-o" "[[IMG_DEV1:a.out-.*gfx803]]"
+// LINK-SAME "[[A_BC1]]" "[[B_BC1]]"
// LINK-NOT: "*.llvm-link"
// LINK-NOT: ".*opt"
// LINK-NOT: ".*llc"
// LINK: {{".*lld.*"}} {{.*}} "-plugin-opt=-amdgpu-internalize-symbols"
-// LINK: "-plugin-opt=mcpu=gfx900"
-// LINK-SAME: "-o" "[[IMG_DEV2:.*.out]]" "[[A_BC2]]" "[[B_BC2]]"
-
-// LINK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
-// LINK-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900"
-// LINK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]"
-
-// LINK: {{".*llvm-mc.*"}} "-o" "[[OBJBUNDLE:.*o]]" "{{.*}}.mcin" "--filetype=obj"
-
-// LINK: [[LD:".*ld.*"]] {{.*}} "-o" "a.out" {{.*}} "[[A_OBJ_HOST]]"
-// LINK-SAME: "[[B_OBJ_HOST]]" "[[OBJBUNDLE]]"
+// LINK-SAME: "-plugin-opt=mcpu=gfx900"
+// LLD-TMP-SAME: "-o" "[[IMG_DEV2:.*.out]]"
+// LLD-FIN-SAME: "-o" "[[IMG_DEV1:a.out-.*gfx900]]"
+// LINK-SAME "[[A_BC2]]" "[[B_BC2]]"
+
+// LINK-BUNDLE: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
+// LINK-BUNDLE-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900"
+// LINK-BUNDLE-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*]]"
+// LINK-NOBUNDLE-NOT: {{".*clang-offload-bundler"}} "-type=o"
+
+// LINK-EMBED: {{".*llvm-mc.*"}} "-o" "[[OBJBUNDLE:.*o]]" "{{.*}}.mcin" "--filetype=obj"
+// LINK-NOEMBED-NOT: {{".*llvm-mc.*"}} "-o"
+
+// LINK-EMBED: [[LD:".*ld.*"]] {{.*}} "-o" "a.out" {{.*}} "[[A_OBJ_HOST]]"
+// LINK-EMBED-SAME: "[[B_OBJ_HOST]]" "[[OBJBUNDLE]]"
+// LINK-NOEMBED-NOT: {{".*ld.*"}} {{.*}} "-o" "a.out"
More information about the cfe-commits
mailing list