[clang] [HIP] diagnose -mwavefrontsize64 for gfx10+ (PR #140185)
via cfe-commits
cfe-commits at lists.llvm.org
Thu May 15 21:10:19 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Yaxun (Sam) Liu (yxsamliu)
<details>
<summary>Changes</summary>
wavefront size 64 is not fully supported for HIP on gfx10+ and it is not tested. As such, currently HIP header contains an pragma error to diagnose such use case by detecting predefined macro `__AMDGCN_WAVEFRONT_SIZE`. However, since clang will remove it and replace it with a builtin function, there is no good way to diagnose such invalid use of wavefront size 64 with gfx10+ in HIP header. Therefore this patch diagnoses uses of -mwavefrontsize64 with gfx10+ in HIPAMD toolchain.
This patch also introduces option `-mforce-unsafe-wavefrontsize64` to allow power users to force wavefront size 64 on gfx10+, with an assumption that they understand its risk.
---
Full diff: https://github.com/llvm/llvm-project/pull/140185.diff
7 Files Affected:
- (modified) clang/include/clang/Basic/DiagnosticDriverKinds.td (+3)
- (modified) clang/include/clang/Driver/Options.td (+2)
- (modified) clang/lib/Driver/ToolChains/AMDGPU.cpp (+25-11)
- (modified) clang/lib/Driver/ToolChains/AMDGPU.h (+7-2)
- (modified) clang/lib/Driver/ToolChains/HIPAMD.cpp (+3)
- (modified) clang/test/Driver/hip-macros.hip (+2-2)
- (modified) clang/test/Driver/hip-toolchain-features.hip (+7-1)
``````````diff
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 4da8f80345ddc..42c6f11e5cbdb 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -200,6 +200,9 @@ def err_drv_opt_unsupported_input_type : Error<
"'%0' invalid for input of type %1">;
def err_drv_amdgpu_ieee_without_no_honor_nans : Error<
"invalid argument '-mno-amdgpu-ieee' only allowed with relaxed NaN handling">;
+def err_drv_wave64_requires_force_unsafe : Error<
+ "'-mwavefrontsize64' is not fully supported for HIP on AMD GPU architecture "
+ "gfx10 and above. Use '-mforce-unsafe-wavefrontsize64' to override">;
def err_drv_argument_not_allowed_with : Error<
"invalid argument '%0' not allowed with '%1'">;
def err_drv_cannot_open_randomize_layout_seed_file : Error<
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index bd8df8f6a749a..41da9c3bff3bf 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5257,6 +5257,8 @@ defm tgsplit : SimpleMFlag<"tgsplit", "Enable", "Disable",
defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64",
"Specify wavefront size 64", "Specify wavefront size 32",
" mode (AMDGPU only)">;
+defm force_unsafe_wavefrontsize64 : SimpleMFlag<"force-unsafe-wavefrontsize64",
+ "Force", "Do not force", " wavefront size 64 for gfx10+ GPUs (AMDGPU only)">;
defm amdgpu_precise_memory_op
: SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable",
" precise memory mode (AMDGPU only)">;
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 35ca019795ddc..d7ce1ca1ca4e7 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -606,6 +606,7 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
// Add target ID features to -target-feature options. No diagnostics should
// be emitted here since invalid target ID is diagnosed at other places.
StringRef TargetID;
+ StringRef GpuArch;
if (Args.hasArg(options::OPT_mcpu_EQ))
TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
else if (Args.hasArg(options::OPT_march_EQ))
@@ -614,7 +615,7 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
llvm::StringMap<bool> FeatureMap;
auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
if (OptionalGpuArch) {
- StringRef GpuArch = *OptionalGpuArch;
+ GpuArch = *OptionalGpuArch;
// Iterate through all possible target ID features for the given GPU.
// If it is mapped to true, add +feature.
// If it is mapped to false, add -feature.
@@ -628,9 +629,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
}
}
}
-
- if (Args.hasFlag(options::OPT_mwavefrontsize64,
- options::OPT_mno_wavefrontsize64, false))
+ if (toolchains::AMDGPUToolChain::isWave64(
+ D, Args, llvm::AMDGPU::parseArchAMDGCN(GpuArch),
+ /*DiagInvalid=*/false,
+ /*Explicit=*/true))
Features.push_back("+wavefrontsize64");
if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op,
@@ -767,15 +769,27 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
llvm::DenormalMode::getIEEE();
}
-bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
- llvm::AMDGPU::GPUKind Kind) {
+bool AMDGPUToolChain::isWave64(const Driver &D,
+ const llvm::opt::ArgList &DriverArgs,
+ llvm::AMDGPU::GPUKind Kind, bool DiagInvalid,
+ bool Explicit) {
const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
- return !HasWave32 || DriverArgs.hasFlag(
- options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
-}
+ bool EnableWave64 = DriverArgs.hasFlag(
+ options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
+ bool ForceUnsafeWave64 =
+ DriverArgs.hasFlag(options::OPT_mforce_unsafe_wavefrontsize64,
+ options::OPT_mno_force_unsafe_wavefrontsize64, false);
+
+ if (DiagInvalid && HasWave32 && EnableWave64 && !ForceUnsafeWave64)
+ D.Diag(diag::err_drv_wave64_requires_force_unsafe);
+ if (Explicit)
+ return EnableWave64 || ForceUnsafeWave64;
+
+ return !HasWave32 || EnableWave64 || ForceUnsafeWave64;
+}
/// ROCM Toolchain
ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
@@ -884,7 +898,7 @@ void ROCMToolChain::addClangTargetOptions(
ABIVer))
return;
- bool Wave64 = isWave64(DriverArgs, Kind);
+ bool Wave64 = isWave64(getDriver(), DriverArgs, Kind);
// TODO: There are way too many flags that change this. Do we need to check
// them all?
bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
@@ -1012,7 +1026,7 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
bool CorrectSqrt = DriverArgs.hasFlag(
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true);
- bool Wave64 = isWave64(DriverArgs, Kind);
+ bool Wave64 = isWave64(getDriver(), DriverArgs, Kind);
// GPU Sanitizer currently only supports ASan and is enabled through host
// ASan.
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index 08bd4fa556f78..d9d0fac71eb91 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -89,8 +89,13 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
const llvm::fltSemantics *FPType = nullptr) const override;
- static bool isWave64(const llvm::opt::ArgList &DriverArgs,
- llvm::AMDGPU::GPUKind Kind);
+ /// Return whether the GPU of kind \p Kind has wavefront size 64 by driver
+ /// arguments \p DriverArgs. When \p Explicit is true, only return true
+ /// if wavefront size is set to 64 by arguments explicitly. When
+ /// \p DiagInvalid is true, diagnose invalid -mwavefrontsize64 arguments.
+ static bool isWave64(const Driver &D, const llvm::opt::ArgList &DriverArgs,
+ llvm::AMDGPU::GPUKind Kind, bool DiagInvalid = false,
+ bool Explicit = false);
/// Needed for using lto.
bool HasNativeLLVMSupport() const override {
return true;
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index abda4eb453387..44f1a3e915283 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -229,6 +229,9 @@ void HIPAMDToolChain::addClangTargetOptions(
assert(DeviceOffloadingKind == Action::OFK_HIP &&
"Only HIP offloading kinds are supported for GPUs.");
+ const StringRef GpuArch = getGPUArch(DriverArgs);
+ auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
+ (void)isWave64(getDriver(), DriverArgs, Kind, /*DiagInvalid=*/true);
CC1Args.append({"-fcuda-is-device", "-fno-threadsafe-statics"});
diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip
index bd93f9985a774..37fdafc53af85 100644
--- a/clang/test/Driver/hip-macros.hip
+++ b/clang/test/Driver/hip-macros.hip
@@ -2,7 +2,7 @@
// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \
// RUN: --cuda-device-only -nogpuinc -nogpulib \
// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
-// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \
+// RUN: %clang -E -dM --offload-arch=gfx1010 -mforce-unsafe-wavefrontsize64 \
// RUN: --cuda-device-only -nogpuinc -nogpulib \
// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \
@@ -16,7 +16,7 @@
// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
// RUN: %clang -E -dM --offload-arch=gfx1010 -mno-wavefrontsize64 \
// RUN: --cuda-device-only -nogpuinc -nogpulib \
-// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
+// RUN: -mforce-unsafe-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 64
// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 32
// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64
diff --git a/clang/test/Driver/hip-toolchain-features.hip b/clang/test/Driver/hip-toolchain-features.hip
index dbc007ac1335b..1de21d9a0ff06 100644
--- a/clang/test/Driver/hip-toolchain-features.hip
+++ b/clang/test/Driver/hip-toolchain-features.hip
@@ -67,9 +67,15 @@
// DUP-NOT: "-target-feature" "{{.*}}wavefrontsize64"
// DUP: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+cumode"
-// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
// RUN: -nogpuinc --offload-arch=gfx1010 --no-offload-new-driver %s \
// RUN: -mno-wavefrontsize64 -mwavefrontsize64 2>&1 \
+// RUN: | FileCheck %s -check-prefix=WAVE64-ERR
+// WAVE64-ERR: error: '-mwavefrontsize64' is not fully supported for HIP on AMD GPU architecture gfx10 and above. Use '-mforce-unsafe-wavefrontsize64' to override
+
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN: -nogpuinc --offload-arch=gfx1010 --no-offload-new-driver %s \
+// RUN: -mno-wavefrontsize64 -mforce-unsafe-wavefrontsize64 2>&1 \
// RUN: | FileCheck %s -check-prefix=WAVE64
// WAVE64: {{.*}}clang{{.*}} "-target-feature" "+wavefrontsize64"
// WAVE64-NOT: "-target-feature" "{{.*}}wavefrontsize16"
``````````
</details>
https://github.com/llvm/llvm-project/pull/140185
More information about the cfe-commits
mailing list