[clang] [HIP] diagnose -mwavefrontsize64 for gfx10+ (PR #140185)

Thu May 15 21:10:19 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Yaxun (Sam) Liu (yxsamliu)

<details>
<summary>Changes</summary>

wavefront size 64 is not fully supported for HIP on gfx10+ and it is not tested. As such, currently HIP header contains an pragma error to diagnose such use case by detecting predefined macro `__AMDGCN_WAVEFRONT_SIZE`. However, since clang will remove it and replace it with a builtin function, there is no good way to diagnose such invalid use of wavefront size 64 with gfx10+ in HIP header. Therefore this patch diagnoses uses of -mwavefrontsize64 with gfx10+ in HIPAMD toolchain.

This patch also introduces option `-mforce-unsafe-wavefrontsize64` to allow power users to force wavefront size 64 on gfx10+, with an assumption that they understand its risk.

---
Full diff: https://github.com/llvm/llvm-project/pull/140185.diff


7 Files Affected:

- (modified) clang/include/clang/Basic/DiagnosticDriverKinds.td (+3) 
- (modified) clang/include/clang/Driver/Options.td (+2) 
- (modified) clang/lib/Driver/ToolChains/AMDGPU.cpp (+25-11) 
- (modified) clang/lib/Driver/ToolChains/AMDGPU.h (+7-2) 
- (modified) clang/lib/Driver/ToolChains/HIPAMD.cpp (+3) 
- (modified) clang/test/Driver/hip-macros.hip (+2-2) 
- (modified) clang/test/Driver/hip-toolchain-features.hip (+7-1) 


``````````diff

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 4da8f80345ddc..42c6f11e5cbdb 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -200,6 +200,9 @@ def err_drv_opt_unsupported_input_type : Error<
   "'%0' invalid for input of type %1">;
 def err_drv_amdgpu_ieee_without_no_honor_nans : Error<
   "invalid argument '-mno-amdgpu-ieee' only allowed with relaxed NaN handling">;
+def err_drv_wave64_requires_force_unsafe : Error<
+  "'-mwavefrontsize64' is not fully supported for HIP on AMD GPU architecture "
+  "gfx10 and above. Use '-mforce-unsafe-wavefrontsize64' to override">;
 def err_drv_argument_not_allowed_with : Error<
   "invalid argument '%0' not allowed with '%1'">;
 def err_drv_cannot_open_randomize_layout_seed_file : Error<
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index bd8df8f6a749a..41da9c3bff3bf 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5257,6 +5257,8 @@ defm tgsplit : SimpleMFlag<"tgsplit", "Enable", "Disable",
 defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64",
   "Specify wavefront size 64", "Specify wavefront size 32",
   " mode (AMDGPU only)">;
+defm force_unsafe_wavefrontsize64 : SimpleMFlag<"force-unsafe-wavefrontsize64",
+  "Force", "Do not force", " wavefront size 64 for gfx10+ GPUs (AMDGPU only)">;
 defm amdgpu_precise_memory_op
     : SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable",
                   " precise memory mode (AMDGPU only)">;
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 35ca019795ddc..d7ce1ca1ca4e7 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -606,6 +606,7 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
   // Add target ID features to -target-feature options. No diagnostics should
   // be emitted here since invalid target ID is diagnosed at other places.
   StringRef TargetID;
+  StringRef GpuArch;
   if (Args.hasArg(options::OPT_mcpu_EQ))
     TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
   else if (Args.hasArg(options::OPT_march_EQ))
@@ -614,7 +615,7 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
     llvm::StringMap<bool> FeatureMap;
     auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
     if (OptionalGpuArch) {
-      StringRef GpuArch = *OptionalGpuArch;
+      GpuArch = *OptionalGpuArch;
       // Iterate through all possible target ID features for the given GPU.
       // If it is mapped to true, add +feature.
       // If it is mapped to false, add -feature.
@@ -628,9 +629,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
       }
     }
   }
-
-  if (Args.hasFlag(options::OPT_mwavefrontsize64,
-                   options::OPT_mno_wavefrontsize64, false))
+  if (toolchains::AMDGPUToolChain::isWave64(
+          D, Args, llvm::AMDGPU::parseArchAMDGCN(GpuArch),
+          /*DiagInvalid=*/false,
+          /*Explicit=*/true))
     Features.push_back("+wavefrontsize64");
 
   if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op,
@@ -767,15 +769,27 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
                llvm::DenormalMode::getIEEE();
 }
 
-bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
-                               llvm::AMDGPU::GPUKind Kind) {
+bool AMDGPUToolChain::isWave64(const Driver &D,
+                               const llvm::opt::ArgList &DriverArgs,
+                               llvm::AMDGPU::GPUKind Kind, bool DiagInvalid,
+                               bool Explicit) {
   const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
   bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
 
-  return !HasWave32 || DriverArgs.hasFlag(
-    options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
-}
+  bool EnableWave64 = DriverArgs.hasFlag(
+      options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
+  bool ForceUnsafeWave64 =
+      DriverArgs.hasFlag(options::OPT_mforce_unsafe_wavefrontsize64,
+                         options::OPT_mno_force_unsafe_wavefrontsize64, false);
+
+  if (DiagInvalid && HasWave32 && EnableWave64 && !ForceUnsafeWave64)
+    D.Diag(diag::err_drv_wave64_requires_force_unsafe);
 
+  if (Explicit)
+    return EnableWave64 || ForceUnsafeWave64;
+
+  return !HasWave32 || EnableWave64 || ForceUnsafeWave64;
+}
 
 /// ROCM Toolchain
 ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
@@ -884,7 +898,7 @@ void ROCMToolChain::addClangTargetOptions(
                                                 ABIVer))
     return;
 
-  bool Wave64 = isWave64(DriverArgs, Kind);
+  bool Wave64 = isWave64(getDriver(), DriverArgs, Kind);
   // TODO: There are way too many flags that change this. Do we need to check
   // them all?
   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
@@ -1012,7 +1026,7 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
   bool CorrectSqrt = DriverArgs.hasFlag(
       options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
       options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true);
-  bool Wave64 = isWave64(DriverArgs, Kind);
+  bool Wave64 = isWave64(getDriver(), DriverArgs, Kind);
 
   // GPU Sanitizer currently only supports ASan and is enabled through host
   // ASan.
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index 08bd4fa556f78..d9d0fac71eb91 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -89,8 +89,13 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
       const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
       const llvm::fltSemantics *FPType = nullptr) const override;
 
-  static bool isWave64(const llvm::opt::ArgList &DriverArgs,
-                       llvm::AMDGPU::GPUKind Kind);
+  /// Return whether the GPU of kind \p Kind has wavefront size 64 by driver
+  /// arguments \p DriverArgs. When \p Explicit is true, only return true
+  /// if wavefront size is set to 64 by arguments explicitly. When
+  /// \p DiagInvalid is true, diagnose invalid -mwavefrontsize64 arguments.
+  static bool isWave64(const Driver &D, const llvm::opt::ArgList &DriverArgs,
+                       llvm::AMDGPU::GPUKind Kind, bool DiagInvalid = false,
+                       bool Explicit = false);
   /// Needed for using lto.
   bool HasNativeLLVMSupport() const override {
     return true;
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index abda4eb453387..44f1a3e915283 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -229,6 +229,9 @@ void HIPAMDToolChain::addClangTargetOptions(
 
   assert(DeviceOffloadingKind == Action::OFK_HIP &&
          "Only HIP offloading kinds are supported for GPUs.");
+  const StringRef GpuArch = getGPUArch(DriverArgs);
+  auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
+  (void)isWave64(getDriver(), DriverArgs, Kind, /*DiagInvalid=*/true);
 
   CC1Args.append({"-fcuda-is-device", "-fno-threadsafe-statics"});
 
diff --git a/clang/test/Driver/hip-macros.hip b/clang/test/Driver/hip-macros.hip
index bd93f9985a774..37fdafc53af85 100644
--- a/clang/test/Driver/hip-macros.hip
+++ b/clang/test/Driver/hip-macros.hip
@@ -2,7 +2,7 @@
 // RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \
 // RUN:   --cuda-device-only -nogpuinc -nogpulib \
 // RUN:   %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
-// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \
+// RUN: %clang -E -dM --offload-arch=gfx1010 -mforce-unsafe-wavefrontsize64 \
 // RUN:   --cuda-device-only -nogpuinc -nogpulib \
 // RUN:   %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
 // RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \
@@ -16,7 +16,7 @@
 // RUN:   -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
 // RUN: %clang -E -dM --offload-arch=gfx1010 -mno-wavefrontsize64 \
 // RUN:   --cuda-device-only -nogpuinc -nogpulib \
-// RUN:   -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
+// RUN:   -mforce-unsafe-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s
 // WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 64
 // WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE__ 32
 // WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64
diff --git a/clang/test/Driver/hip-toolchain-features.hip b/clang/test/Driver/hip-toolchain-features.hip
index dbc007ac1335b..1de21d9a0ff06 100644
--- a/clang/test/Driver/hip-toolchain-features.hip
+++ b/clang/test/Driver/hip-toolchain-features.hip
@@ -67,9 +67,15 @@
 // DUP-NOT: "-target-feature" "{{.*}}wavefrontsize64"
 // DUP: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+cumode"
 
-// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN: not %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
 // RUN:   -nogpuinc --offload-arch=gfx1010 --no-offload-new-driver %s \
 // RUN:   -mno-wavefrontsize64 -mwavefrontsize64 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=WAVE64-ERR
+// WAVE64-ERR: error: '-mwavefrontsize64' is not fully supported for HIP on AMD GPU architecture gfx10 and above. Use '-mforce-unsafe-wavefrontsize64' to override
+
+// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -nogpulib \
+// RUN:   -nogpuinc --offload-arch=gfx1010 --no-offload-new-driver %s \
+// RUN:   -mno-wavefrontsize64 -mforce-unsafe-wavefrontsize64 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=WAVE64
 // WAVE64: {{.*}}clang{{.*}} "-target-feature" "+wavefrontsize64"
 // WAVE64-NOT: "-target-feature" "{{.*}}wavefrontsize16"

``````````

</details>


https://github.com/llvm/llvm-project/pull/140185