[clang] ccac6b2 - [hip] Properly populate macros based on host processor.
Michael Liao via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 4 12:37:54 PST 2020
Author: Michael Liao
Date: 2020-02-04T15:36:14-05:00
New Revision: ccac6b2bf877337a883c3763e41a529d8f9cc1ff
URL: https://github.com/llvm/llvm-project/commit/ccac6b2bf877337a883c3763e41a529d8f9cc1ff
DIFF: https://github.com/llvm/llvm-project/commit/ccac6b2bf877337a883c3763e41a529d8f9cc1ff.diff
LOG: [hip] Properly populate macros based on host processor.
Summary:
- The device compilation needs to have a consistent source code compared
to the corresponding host compilation. If macros based on the
host-specific target processor is not properly populated, the device
compilation may fail due to the inconsistent source after the
preprocessor. So far, only the host triple is used to build the
macros. If a detailed host CPU target or certain features are
specified, macros derived from them won't be populated properly, e.g.
`__SSE3__` won't be added unless `+sse3` feature is present. On
Windows compilation compatible with MSVC, that missing macros result
in that intrinsics are not included and cause device compilation
failure on the host-side source.
- This patch addresses this issue by introducing two `cc1` options,
i.e., `-aux-target-cpu` and `-aux-target-feature`. If a specific host
CPU target or certain features are specified, the compiler driver will
append them during the construction of the offline compilation
actions. Then, the toolchain in `cc1` phase will populate macros
accordingly.
- An internal option `--gpu-use-aux-triple-only` is added to fall back
the original behavior to help diagnosing potential issues from the new
behavior.
Reviewers: tra, yaxunl
Subscribers: cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D73942
Added:
clang/test/Driver/hip-host-cpu-features.hip
clang/test/Preprocessor/hip-host-cpu-macros.cu
Modified:
clang/include/clang/Driver/CC1Options.td
clang/include/clang/Driver/Options.td
clang/include/clang/Frontend/FrontendOptions.h
clang/lib/Driver/ToolChains/Clang.cpp
clang/lib/Frontend/CompilerInstance.cpp
clang/lib/Frontend/CompilerInvocation.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td
index f535d86d9b5e..0d0b05f8961c 100644
--- a/clang/include/clang/Driver/CC1Options.td
+++ b/clang/include/clang/Driver/CC1Options.td
@@ -482,6 +482,10 @@ def cc1as : Flag<["-"], "cc1as">;
def ast_merge : Separate<["-"], "ast-merge">,
MetaVarName<"<ast file>">,
HelpText<"Merge the given AST file into the translation unit being compiled.">;
+def aux_target_cpu : Separate<["-"], "aux-target-cpu">,
+ HelpText<"Target a specific auxiliary cpu type">;
+def aux_target_feature : Separate<["-"], "aux-target-feature">,
+ HelpText<"Target specific auxiliary attributes">;
def aux_triple : Separate<["-"], "aux-triple">,
HelpText<"Auxiliary target triple.">;
def code_completion_at : Separate<["-"], "code-completion-at">,
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 388ff094ae44..2c925d018da7 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -549,6 +549,9 @@ def c : Flag<["-"], "c">, Flags<[DriverOption]>, Group<Action_Group>,
def fconvergent_functions : Flag<["-"], "fconvergent-functions">, Group<f_Group>, Flags<[CC1Option]>,
HelpText<"Assume functions may be convergent">;
+def gpu_use_aux_triple_only : Flag<["--"], "gpu-use-aux-triple-only">,
+ InternalDriverOpt, HelpText<"Prepare '-aux-triple' only without populating "
+ "'-aux-target-cpu' and '-aux-target-feature'.">;
def cuda_device_only : Flag<["--"], "cuda-device-only">,
HelpText<"Compile CUDA code for device only">;
def cuda_host_only : Flag<["--"], "cuda-host-only">,
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index 09969b596d63..2adc6319810c 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -426,9 +426,15 @@ class FrontendOptions {
/// (in the format produced by -fdump-record-layouts).
std::string OverrideRecordLayoutsFile;
- /// Auxiliary triple for CUDA compilation.
+ /// Auxiliary triple for CUDA/HIP compilation.
std::string AuxTriple;
+ /// Auxiliary target CPU for CUDA/HIP compilation.
+ Optional<std::string> AuxTargetCPU;
+
+ /// Auxiliary target features for CUDA/HIP compilation.
+ Optional<std::vector<std::string>> AuxTargetFeatures;
+
/// Filename to write statistics to.
std::string StatsFile;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 6f092ca274c0..ccdfbe8c604f 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -309,7 +309,7 @@ static void getWebAssemblyTargetFeatures(const ArgList &Args,
static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple,
const ArgList &Args, ArgStringList &CmdArgs,
- bool ForAS) {
+ bool ForAS, bool IsAux = false) {
const Driver &D = TC.getDriver();
std::vector<StringRef> Features;
switch (Triple.getArch()) {
@@ -387,7 +387,7 @@ static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple,
if (Last != I)
continue;
- CmdArgs.push_back("-target-feature");
+ CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature");
CmdArgs.push_back(Name.data());
}
}
@@ -4627,6 +4627,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
AsynchronousUnwindTables))
CmdArgs.push_back("-munwind-tables");
+ // Prepare `-aux-target-cpu` and `-aux-target-feature` unless
+ // `--gpu-use-aux-triple-only` is specified.
+ if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) &&
+ ((IsCuda && JA.isDeviceOffloading(Action::OFK_Cuda)) ||
+ (IsHIP && JA.isDeviceOffloading(Action::OFK_HIP)))) {
+ const ArgList &HostArgs =
+ C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None);
+ std::string HostCPU =
+ getCPUName(HostArgs, *TC.getAuxTriple(), /*FromAs*/ false);
+ if (!HostCPU.empty()) {
+ CmdArgs.push_back("-aux-target-cpu");
+ CmdArgs.push_back(Args.MakeArgString(HostCPU));
+ }
+ getTargetFeatures(TC, *TC.getAuxTriple(), HostArgs, CmdArgs,
+ /*ForAS*/ false, /*IsAux*/ true);
+ }
+
TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());
// FIXME: Handle -mtune=.
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 071523219eeb..0db8df0fada8 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -923,6 +923,10 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
!getFrontendOpts().AuxTriple.empty()) {
auto TO = std::make_shared<TargetOptions>();
TO->Triple = llvm::Triple::normalize(getFrontendOpts().AuxTriple);
+ if (getFrontendOpts().AuxTargetCPU)
+ TO->CPU = getFrontendOpts().AuxTargetCPU.getValue();
+ if (getFrontendOpts().AuxTargetFeatures)
+ TO->FeaturesAsWritten = getFrontendOpts().AuxTargetFeatures.getValue();
TO->HostTriple = getTarget().getTriple().str();
setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO));
}
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 319f0d58ec4a..2fd66b6f546b 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1931,6 +1931,10 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
Opts.OverrideRecordLayoutsFile =
std::string(Args.getLastArgValue(OPT_foverride_record_layout_EQ));
Opts.AuxTriple = std::string(Args.getLastArgValue(OPT_aux_triple));
+ if (Args.hasArg(OPT_aux_target_cpu))
+ Opts.AuxTargetCPU = std::string(Args.getLastArgValue(OPT_aux_target_cpu));
+ if (Args.hasArg(OPT_aux_target_feature))
+ Opts.AuxTargetFeatures = Args.getAllArgValues(OPT_aux_target_feature);
Opts.StatsFile = std::string(Args.getLastArgValue(OPT_stats_file));
if (const Arg *A = Args.getLastArg(OPT_arcmt_check,
diff --git a/clang/test/Driver/hip-host-cpu-features.hip b/clang/test/Driver/hip-host-cpu-features.hip
new file mode 100644
index 000000000000..efadd8bed22c
--- /dev/null
+++ b/clang/test/Driver/hip-host-cpu-features.hip
@@ -0,0 +1,19 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -c -target x86_64-linux-gnu -march=znver2 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTCPU
+// RUN: %clang -### -c -target x86_64-linux-gnu -msse3 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTSSE3
+// RUN: %clang -### -c -target x86_64-linux-gnu --gpu-use-aux-triple-only -march=znver2 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=NOHOSTCPU
+
+// HOSTCPU: "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// HOSTCPU-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// HOSTCPU-SAME: "-aux-target-cpu" "znver2"
+
+// HOSTSSE3: "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// HOSTSSE3-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// HOSTSSE3-SAME: "-aux-target-feature" "+sse3"
+
+// NOHOSTCPU: "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// NOHOSTCPU-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// NOHOSTCPU-NOT: "-aux-target-cpu" "znver2"
diff --git a/clang/test/Preprocessor/hip-host-cpu-macros.cu b/clang/test/Preprocessor/hip-host-cpu-macros.cu
new file mode 100644
index 000000000000..efec439c3656
--- /dev/null
+++ b/clang/test/Preprocessor/hip-host-cpu-macros.cu
@@ -0,0 +1,13 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+#ifdef __HIP_DEVICE_COMPILE__
+DEVICE __SSE3__
+#else
+HOST __SSE3__
+#endif
+
+// RUN: %clang -x hip -E -target x86_64-linux-gnu -msse3 --cuda-gpu-arch=gfx803 -nogpulib -o - %s 2>&1 | FileCheck %s
+
+// CHECK-NOT: SSE3
More information about the cfe-commits
mailing list