[clang] c9d65a4 - HIP: Ensure new denormal mode attributes are set

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 31 15:00:47 PDT 2020


Author: Matt Arsenault
Date: 2020-03-31T18:00:37-04:00
New Revision: c9d65a48af1d7bbfed7e785613cc9d9acf71821b

URL: https://github.com/llvm/llvm-project/commit/c9d65a48af1d7bbfed7e785613cc9d9acf71821b
DIFF: https://github.com/llvm/llvm-project/commit/c9d65a48af1d7bbfed7e785613cc9d9acf71821b.diff

LOG: HIP: Ensure new denormal mode attributes are set

Apparently HIPToolChain does not subclass from AMDGPUToolChain, so
this was not applying the new denormal attributes. I'm not sure why
this doesn't subclass. Just copy the implementation for now.

Added: 
    

Modified: 
    clang/lib/Driver/ToolChains/AMDGPU.cpp
    clang/lib/Driver/ToolChains/AMDGPU.h
    clang/lib/Driver/ToolChains/HIP.cpp
    clang/lib/Driver/ToolChains/HIP.h
    clang/test/Driver/cuda-flush-denormals-to-zero.cu

Removed: 
    


################################################################################
diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 06e4686ac2b9..e6a5af99b203 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -103,6 +103,19 @@ AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
   return DAL;
 }
 
+bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
+    llvm::AMDGPU::GPUKind Kind) {
+  const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
+
+  // Default to enabling f32 denormals by default on subtargets where fma is
+  // fast with denormals
+  const bool DefaultDenormsAreZeroForTarget =
+      (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
+      (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
+
+  return DefaultDenormsAreZeroForTarget;
+}
+
 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
     const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
     const llvm::fltSemantics *FPType) const {
@@ -121,18 +134,10 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
   const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
 
-  // Default to enabling f32 denormals by default on subtargets where fma is
-  // fast with denormals
-
-  const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
-  const bool DefaultDenormsAreZeroForTarget =
-    (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
-    (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
-
   // TODO: There are way too many flags that change this. Do we need to check
   // them all?
   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
-             !DefaultDenormsAreZeroForTarget;
+             !getDefaultDenormsAreZeroForTarget(Kind);
   // Outputs are flushed to zero, preserving sign
   return DAZ ? llvm::DenormalMode::getPreserveSign() :
                llvm::DenormalMode::getIEEE();

diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index 78c40580b302..e7a873efb008 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -13,6 +13,8 @@
 #include "clang/Driver/Options.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
+#include "llvm/Support/TargetParser.h"
+
 #include <map>
 
 namespace clang {
@@ -67,6 +69,10 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
                              llvm::opt::ArgStringList &CC1Args,
                              Action::OffloadKind DeviceOffloadKind) const override;
 
+  /// Return whether denormals should be flushed, and treated as 0 by default
+  /// for the subtarget.
+  static bool getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind GPUKind);
+
   llvm::DenormalMode getDefaultDenormalModeForType(
       const llvm::opt::ArgList &DriverArgs,
       Action::OffloadKind DeviceOffloadKind,

diff  --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp
index 157dca7e0c8d..0a9ec68e13e4 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "HIP.h"
+#include "AMDGPU.h"
 #include "CommonArgs.h"
 #include "InputInfo.h"
 #include "clang/Basic/Cuda.h"
@@ -16,6 +17,7 @@
 #include "clang/Driver/Options.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/TargetParser.h"
 
 using namespace clang::driver;
 using namespace clang::driver::toolchains;
@@ -272,6 +274,34 @@ HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
   getProgramPaths().push_back(getDriver().Dir);
 }
 
+// FIXME: Duplicated in AMDGPUToolChain
+llvm::DenormalMode HIPToolChain::getDefaultDenormalModeForType(
+    const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
+    const llvm::fltSemantics *FPType) const {
+  // Denormals should always be enabled for f16 and f64.
+  if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
+    return llvm::DenormalMode::getIEEE();
+
+  if (DeviceOffloadKind == Action::OFK_Cuda) {
+    if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
+        DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
+                           options::OPT_fno_cuda_flush_denormals_to_zero,
+                           false))
+      return llvm::DenormalMode::getPreserveSign();
+  }
+
+  const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
+  auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
+
+  // TODO: There are way too many flags that change this. Do we need to check
+  // them all?
+  bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
+    !AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(Kind);
+  // Outputs are flushed to zero, preserving sign
+  return DAZ ? llvm::DenormalMode::getPreserveSign() :
+               llvm::DenormalMode::getIEEE();
+}
+
 void HIPToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs,
     llvm::opt::ArgStringList &CC1Args,

diff  --git a/clang/lib/Driver/ToolChains/HIP.h b/clang/lib/Driver/ToolChains/HIP.h
index c4f944e458bf..32eb8704feb5 100644
--- a/clang/lib/Driver/ToolChains/HIP.h
+++ b/clang/lib/Driver/ToolChains/HIP.h
@@ -115,6 +115,11 @@ class LLVM_LIBRARY_VISIBILITY HIPToolChain : public ToolChain {
 
   unsigned GetDefaultDwarfVersion() const override { return 4; }
 
+  llvm::DenormalMode getDefaultDenormalModeForType(
+    const llvm::opt::ArgList &DriverArgs,
+    Action::OffloadKind DeviceOffloadKind,
+    const llvm::fltSemantics *FPType = nullptr) const override;
+
   const ToolChain &HostTC;
 
 protected:

diff  --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
index 74f4bbc1585e..5b1046b0cb12 100644
--- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -7,6 +7,16 @@
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
+// Test explicit argument.
+// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
+// Test the default changing with no argument based on the subtarget.
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
 // CPUFTZ-NOT: -fdenormal-fp-math
 
 // FTZ-NOT: -fdenormal-fp-math-f32=


        


More information about the cfe-commits mailing list