[clang] afcc6ba - [clang][HIP] Updating driver to enable archive/bitcode to bitcode linking when targeting HIPAMD toolchain
Jacob Lambert via cfe-commits
cfe-commits at lists.llvm.org
Thu Apr 21 09:24:44 PDT 2022
Author: Jacob Lambert
Date: 2022-04-21T09:24:33-07:00
New Revision: afcc6baac52fcc91d1636f6803f5c230e7018016
URL: https://github.com/llvm/llvm-project/commit/afcc6baac52fcc91d1636f6803f5c230e7018016
DIFF: https://github.com/llvm/llvm-project/commit/afcc6baac52fcc91d1636f6803f5c230e7018016.diff
LOG: [clang][HIP] Updating driver to enable archive/bitcode to bitcode linking when targeting HIPAMD toolchain
Differential Revision: https://reviews.llvm.org/D124151
Added:
clang/test/Driver/hip-link-bc-to-bc.hip
Modified:
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/HIPAMD.cpp
clang/lib/Driver/ToolChains/HIPAMD.h
clang/test/Driver/hip-phases.hip
Removed:
################################################################################
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index c2f33f2970e1f..0e0d2b47bd663 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -3065,7 +3065,7 @@ class OffloadingActionBuilder final {
// amdgcn does not support linking of object files, therefore we skip
// backend and assemble phases to output LLVM IR. Except for generating
- // non-relocatable device coee, where we generate fat binary for device
+ // non-relocatable device code, where we generate fat binary for device
// code and pass to host in Backend phase.
if (CudaDeviceActions.empty())
return ABRT_Success;
@@ -3074,7 +3074,7 @@ class OffloadingActionBuilder final {
CudaDeviceActions.size() == GpuArchList.size()) &&
"Expecting one action per GPU architecture.");
assert(!CompileHostOnly &&
- "Not expecting CUDA actions in host-only compilation.");
+ "Not expecting HIP actions in host-only compilation.");
if (!Relocatable && CurPhase == phases::Backend && !EmitLLVM &&
!EmitAsm) {
@@ -3203,12 +3203,16 @@ class OffloadingActionBuilder final {
"Linker inputs and GPU arch list sizes do not match.");
ActionList Actions;
- // Append a new link action for each device.
unsigned I = 0;
+ // Append a new link action for each device.
+ // Each entry in DeviceLinkerInputs corresponds to a GPU arch.
for (auto &LI : DeviceLinkerInputs) {
- // Each entry in DeviceLinkerInputs corresponds to a GPU arch.
- auto *DeviceLinkAction =
- C.MakeAction<LinkJobAction>(LI, types::TY_Image);
+
+ types::ID Output = Args.hasArg(options::OPT_emit_llvm)
+ ? types::TY_LLVM_BC
+ : types::TY_Image;
+
+ auto *DeviceLinkAction = C.MakeAction<LinkJobAction>(LI, Output);
// Linking all inputs for the current GPU arch.
// LI contains all the inputs for the linker.
OffloadAction::DeviceDependences DeviceLinkDeps;
@@ -3220,6 +3224,12 @@ class OffloadingActionBuilder final {
}
DeviceLinkerInputs.clear();
+ // If emitting LLVM, do not generate final host/device compilation action
+ if (Args.hasArg(options::OPT_emit_llvm)) {
+ AL.append(Actions);
+ return;
+ }
+
// Create a host object from all the device images by embedding them
// in a fat binary for mixed host-device compilation. For device-only
// compilation, creates a fat binary.
@@ -3747,7 +3757,8 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);
if (FinalPhase == phases::Link) {
- if (Args.hasArg(options::OPT_emit_llvm))
+ // Emitting LLVM while linking disabled except in HIPAMD Toolchain
+ if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link))
Diag(clang::diag::err_drv_emit_llvm_link);
if (IsCLMode() && LTOMode != LTOK_None &&
!Args.getLastArgValue(options::OPT_fuse_ld_EQ)
@@ -3932,7 +3943,10 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
// Queue linker inputs.
if (Phase == phases::Link) {
assert(Phase == PL.back() && "linking must be final compilation step.");
- LinkerInputs.push_back(Current);
+ // We don't need to generate additional link commands if emitting AMD bitcode
+ if (!(C.getInputArgs().hasArg(options::OPT_hip_link) &&
+ (C.getInputArgs().hasArg(options::OPT_emit_llvm))))
+ LinkerInputs.push_back(Current);
Current = nullptr;
break;
}
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index 9f0ac6294e607..f672d2a108cc9 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -72,6 +72,36 @@ static bool shouldSkipSanitizeOption(const ToolChain &TC,
return false;
}
+void AMDGCN::Linker::constructLlvmLinkCommand(Compilation &C,
+ const JobAction &JA,
+ const InputInfoList &Inputs,
+ const InputInfo &Output,
+ const llvm::opt::ArgList &Args) const {
+ // Construct llvm-link command.
+ // The output from llvm-link is a bitcode file.
+ ArgStringList LlvmLinkArgs;
+
+ assert(!Inputs.empty() && "Must have at least one input.");
+
+ LlvmLinkArgs.append({"-o", Output.getFilename()});
+ for (auto Input : Inputs)
+ LlvmLinkArgs.push_back(Input.getFilename());
+
+ // Look for archive of bundled bitcode in arguments, and add temporary files
+ // for the extracted archive of bitcode to inputs.
+ auto TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
+ AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LlvmLinkArgs, "amdgcn",
+ TargetID,
+ /*IsBitCodeSDL=*/true,
+ /*PostClangLink=*/false);
+
+ const char *LlvmLink =
+ Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
+ C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+ LlvmLink, LlvmLinkArgs, Inputs,
+ Output));
+}
+
void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs,
const InputInfo &Output,
@@ -135,7 +165,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
}
// For amdgcn the inputs of the linker job are device bitcode and output is
-// object file. It calls llvm-link, opt, llc, then lld steps.
+// either an object file or bitcode (-emit-llvm). It calls llvm-link, opt,
+// llc, then lld steps.
void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
@@ -151,6 +182,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
return HIP::constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs,
Args, *this);
+ if (JA.getType() == types::TY_LLVM_BC)
+ return constructLlvmLinkCommand(C, JA, Inputs, Output, Args);
+
return constructLldCommand(C, JA, Inputs, Output, Args);
}
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h
index cc472a595db98..25d4a998e5007 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.h
+++ b/clang/lib/Driver/ToolChains/HIPAMD.h
@@ -36,6 +36,10 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
void constructLldCommand(Compilation &C, const JobAction &JA,
const InputInfoList &Inputs, const InputInfo &Output,
const llvm::opt::ArgList &Args) const;
+ void constructLlvmLinkCommand(Compilation &C, const JobAction &JA,
+ const InputInfoList &Inputs,
+ const InputInfo &Output,
+ const llvm::opt::ArgList &Args) const;
};
} // end namespace AMDGCN
diff --git a/clang/test/Driver/hip-link-bc-to-bc.hip b/clang/test/Driver/hip-link-bc-to-bc.hip
new file mode 100644
index 0000000000000..42a539bd3cdd8
--- /dev/null
+++ b/clang/test/Driver/hip-link-bc-to-bc.hip
@@ -0,0 +1,34 @@
+// REQUIRES: clang-driver, x86-registered-target, amdgpu-registered-target
+
+// Check that clang unbundles the two bitcodes and links via llvm-link
+// RUN: touch %T/bundle1.bc
+// RUN: touch %T/bundle2.bc
+
+// RUN: %clang -### --offload-arch=gfx906 --hip-link \
+// RUN: -emit-llvm -fgpu-rdc --cuda-device-only \
+// RUN: %T/bundle1.bc %T/bundle2.bc \
+// RUN: 2>&1 | FileCheck -check-prefix=BITCODE %s
+
+// BITCODE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle1.bc" "-output=[[B1HOST:.*\.bc]]" "-output=[[B1DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
+// BITCODE: "{{.*}}clang-{{.*}}" "-o" "[[B1DEV2:.*bundle1-gfx906.bc]]" "-x" "ir" "[[B1DEV1]]"
+
+// BITCODE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle2.bc" "-output=[[B2HOST:.*\.bc]]" "-output=[[B2DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
+// BITCODE: "{{.*}}clang-{{.*}}" "-o" "[[B2DEV2:.*bundle2-gfx906.bc]]" "-x" "ir" "[[B2DEV1]]"
+
+// BITCODE: "{{.*}}llvm-link" "-o" "bundle1-hip-amdgcn-amd-amdhsa-gfx906.bc" "[[B1DEV2]]" "[[B2DEV2]]"
+
+// Check that clang unbundles the bitcode and archive and links via llvm-link
+// RUN: touch %T/libhipbundle.a
+// RUN: touch %T/bundle.bc
+
+// RUN: %clang -### --offload-arch=gfx906 --hip-link \
+// RUN: -emit-llvm -fgpu-rdc --cuda-device-only \
+// RUN: %T/bundle.bc -L%T -lhipbundle \
+// RUN: 2>&1 | FileCheck -check-prefix=ARCHIVE %s
+
+// ARCHIVE: "{{.*}}clang-offload-bundler" "-type=bc" "-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa-gfx906" "-input={{.*}}bundle.bc" "-output=[[HOST:.*\.bc]]" "-output=[[DEV1:.*\.bc]]" "-unbundle" "-allow-missing-bundles"
+// ARCHIVE: "{{.*}}clang-{{.*}}" "-o" "[[DEV2:.*\.bc]]" "-x" "ir" "[[DEV1]]"
+
+// ARCHIVE: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}libhipbundle.a" "-targets=hip-amdgcn-amd-amdhsa-gfx906" "-output=[[AR:.*\.a]]" "-allow-missing-bundles" "-hip-openmp-compatible"
+
+// ARCHIVE: "{{.*}}llvm-link" "-o" "bundle-hip-amdgcn-amd-amdhsa-gfx906.bc" "[[DEV2]]" "[[AR]]"
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
index 047322b963d27..808a1372d652e 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -520,3 +520,25 @@
// MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx803)
// MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx900)
// MIXED2-NEG-NOT: input, "{{.*}}empty.cpp", c++
+
+// Test HIP bitcode to bitcode linking. Input should be bundled or unbundled bitcode, and
+// output should be unbundled linked bitcode
+
+// RUN: touch %T/bitcodeA.bc
+// RUN: touch %T/bitcodeB.bc
+// RUN: %clang -ccc-print-phases --hip-link -emit-llvm --cuda-device-only \
+// RUN: --offload-arch=gfx906 %T/bitcodeA.bc %T/bitcodeB.bc 2>&1 \
+// RUN: | FileCheck -check-prefixes=CHECK %s
+
+// CHECK: [[A0:[0-9]+]]: input, "{{.*}}bitcodeA.bc", ir
+// CHECK: [[A1:[0-9]+]]: clang-offload-unbundler, {[[A0]]}, ir
+// CHECK: [[A2:[0-9]+]]: compiler, {[[A1]]}, ir, (device-hip, [[ARCH:gfx906]])
+// CHECK: [[A3:[0-9]+]]: backend, {[[A2]]}, ir, (device-hip, [[ARCH]])
+
+// CHECK: [[B0:[0-9]+]]: input, "{{.*}}bitcodeB.bc", ir
+// CHECK: [[B1:[0-9]+]]: clang-offload-unbundler, {[[B0]]}, ir
+// CHECK: [[B2:[0-9]+]]: compiler, {[[B1]]}, ir, (device-hip, [[ARCH]])
+// CHECK: [[B3:[0-9]+]]: backend, {[[B2]]}, ir, (device-hip, [[ARCH]])
+
+// CHECK: [[L0:[0-9]+]]: linker, {[[A3]], [[B3]]}, ir, (device-hip, [[ARCH]])
+// CHECK: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[L0]]}, ir
More information about the cfe-commits
mailing list