[clang] dc89a3e - HIP: Fix handling of denormal mode

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Mon Apr 13 11:48:59 PDT 2020


Author: Matt Arsenault
Date: 2020-04-13T11:48:45-07:00
New Revision: dc89a3efb43feedec04facfa2206de011d2606e7

URL: https://github.com/llvm/llvm-project/commit/dc89a3efb43feedec04facfa2206de011d2606e7
DIFF: https://github.com/llvm/llvm-project/commit/dc89a3efb43feedec04facfa2206de011d2606e7.diff

LOG: HIP: Fix handling of denormal mode

I didn't realize HIP was a distinct offloading kind, so the subtarget
was looking for -march, which isn't correct for HIP. We also have the
possibility of different denormal defaults in the case of multiple
offload targets, so we need to thread the JobAction through the target
hook.

Added: 
    

Modified: 
    clang/include/clang/Driver/ToolChain.h
    clang/lib/Driver/ToolChains/AMDGPU.cpp
    clang/lib/Driver/ToolChains/AMDGPU.h
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/lib/Driver/ToolChains/Cuda.cpp
    clang/lib/Driver/ToolChains/Cuda.h
    clang/lib/Driver/ToolChains/Linux.cpp
    clang/lib/Driver/ToolChains/Linux.h
    clang/lib/Driver/ToolChains/PS4CPU.h
    clang/test/Driver/cuda-flush-denormals-to-zero.cu

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 66f22d538138..fb3cbd7f84c8 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -636,8 +636,7 @@ class ToolChain {
   /// environment for the given \p FPType if given. Otherwise, the default
   /// assumed mode for any floating point type.
   virtual llvm::DenormalMode getDefaultDenormalModeForType(
-      const llvm::opt::ArgList &DriverArgs,
-      Action::OffloadKind DeviceOffloadKind,
+      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
       const llvm::fltSemantics *FPType = nullptr) const {
     return llvm::DenormalMode::getIEEE();
   }

diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index f09578f4769e..2a796f28403f 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -273,18 +273,22 @@ bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
 }
 
 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
-    const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
+    const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
     const llvm::fltSemantics *FPType) const {
   // Denormals should always be enabled for f16 and f64.
   if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
     return llvm::DenormalMode::getIEEE();
 
-  if (DeviceOffloadKind == Action::OFK_Cuda) {
+  if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
+      JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
+    auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch());
     if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
         DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
                            options::OPT_fno_cuda_flush_denormals_to_zero,
-                           false))
+                           getDefaultDenormsAreZeroForTarget(Kind)))
       return llvm::DenormalMode::getPreserveSign();
+
+    return llvm::DenormalMode::getIEEE();
   }
 
   const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
@@ -294,7 +298,9 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
   // them all?
   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
              getDefaultDenormsAreZeroForTarget(Kind);
-  // Outputs are flushed to zero, preserving sign
+
+  // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
+  // also implicit treated as zero (DAZ).
   return DAZ ? llvm::DenormalMode::getPreserveSign() :
                llvm::DenormalMode::getIEEE();
 }

diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index 87a16272d624..afd71e1f595b 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -214,8 +214,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
   static bool getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind GPUKind);
 
   llvm::DenormalMode getDefaultDenormalModeForType(
-      const llvm::opt::ArgList &DriverArgs,
-      Action::OffloadKind DeviceOffloadKind,
+      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
       const llvm::fltSemantics *FPType = nullptr) const override;
 };
 

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 5f9b6d813416..415ef27eee0a 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2510,7 +2510,7 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
 static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
                                        bool OFastEnabled, const ArgList &Args,
                                        ArgStringList &CmdArgs,
-                                       Action::OffloadKind DeviceOffloadKind) {
+                                       const JobAction &JA) {
   // Handle various floating point optimization flags, mapping them to the
   // appropriate LLVM code generation flags. This is complicated by several
   // "umbrella" flags, so we do this by stepping through the flags incrementally
@@ -2533,10 +2533,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
   // -ffp-exception-behavior options: strict, maytrap, ignore
   StringRef FPExceptionBehavior = "";
   const llvm::DenormalMode DefaultDenormalFPMath =
-      TC.getDefaultDenormalModeForType(Args, DeviceOffloadKind);
+      TC.getDefaultDenormalModeForType(Args, JA);
   const llvm::DenormalMode DefaultDenormalFP32Math =
-    TC.getDefaultDenormalModeForType(Args, DeviceOffloadKind,
-                                     &llvm::APFloat::IEEEsingle());
+      TC.getDefaultDenormalModeForType(Args, JA, &llvm::APFloat::IEEEsingle());
 
   llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath;
   llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math;
@@ -4295,7 +4294,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-mdisable-tail-calls");
 
     RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args,
-                               CmdArgs, JA.getOffloadingDeviceKind());
+                               CmdArgs, JA);
 
     // Render ABI arguments
     switch (TC.getArch()) {
@@ -4618,8 +4617,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.hasArg(options::OPT_fsplit_stack))
     CmdArgs.push_back("-split-stacks");
 
-  RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs,
-                             JA.getOffloadingDeviceKind());
+  RenderFloatingPointOptions(TC, D, OFastEnabled, Args, CmdArgs, JA);
 
   if (Arg *A = Args.getLastArg(options::OPT_mdouble_EQ)) {
     if (TC.getArch() == llvm::Triple::avr)

diff  --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 51cd5a781d63..08064de13b5b 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -721,9 +721,9 @@ void CudaToolChain::addClangTargetOptions(
 }
 
 llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
-    const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
+    const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
     const llvm::fltSemantics *FPType) const {
-  if (DeviceOffloadKind == Action::OFK_Cuda) {
+  if (JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
     if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
         DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
                            options::OPT_fno_cuda_flush_denormals_to_zero,
@@ -731,7 +731,7 @@ llvm::DenormalMode CudaToolChain::getDefaultDenormalModeForType(
       return llvm::DenormalMode::getPreserveSign();
   }
 
-  assert(DeviceOffloadKind != Action::OFK_Host);
+  assert(JA.getOffloadingDeviceKind() != Action::OFK_Host);
   return llvm::DenormalMode::getIEEE();
 }
 

diff  --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h
index 3de98c11a215..bf462d702587 100644
--- a/clang/lib/Driver/ToolChains/Cuda.h
+++ b/clang/lib/Driver/ToolChains/Cuda.h
@@ -156,8 +156,7 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain {
                              Action::OffloadKind DeviceOffloadKind) const override;
 
   llvm::DenormalMode getDefaultDenormalModeForType(
-      const llvm::opt::ArgList &DriverArgs,
-      Action::OffloadKind DeviceOffloadKind,
+      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
       const llvm::fltSemantics *FPType = nullptr) const override;
 
   // Never try to use the integrated assembler with CUDA; always fork out to

diff  --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index 788b089351d4..1455d3bfc543 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -988,10 +988,10 @@ void Linux::addProfileRTLibs(const llvm::opt::ArgList &Args,
   ToolChain::addProfileRTLibs(Args, CmdArgs);
 }
 
-llvm::DenormalMode Linux::getDefaultDenormalModeForType(
-  const llvm::opt::ArgList &DriverArgs,
-  Action::OffloadKind DeviceOffloadKind,
-  const llvm::fltSemantics *FPType) const {
+llvm::DenormalMode
+Linux::getDefaultDenormalModeForType(const llvm::opt::ArgList &DriverArgs,
+                                     const JobAction &JA,
+                                     const llvm::fltSemantics *FPType) const {
   switch (getTriple().getArch()) {
   case llvm::Triple::x86:
   case llvm::Triple::x86_64: {

diff  --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h
index 999f991b6360..550cb96b0b9a 100644
--- a/clang/lib/Driver/ToolChains/Linux.h
+++ b/clang/lib/Driver/ToolChains/Linux.h
@@ -49,9 +49,8 @@ class LLVM_LIBRARY_VISIBILITY Linux : public Generic_ELF {
   std::vector<std::string> ExtraOpts;
 
   llvm::DenormalMode getDefaultDenormalModeForType(
-    const llvm::opt::ArgList &DriverArgs,
-    Action::OffloadKind DeviceOffloadKind,
-    const llvm::fltSemantics *FPType = nullptr) const override;
+      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
+      const llvm::fltSemantics *FPType = nullptr) const override;
 
 protected:
   Tool *buildAssembler() const override;

diff  --git a/clang/lib/Driver/ToolChains/PS4CPU.h b/clang/lib/Driver/ToolChains/PS4CPU.h
index 8fedb6eda0ef..27969bcf3726 100644
--- a/clang/lib/Driver/ToolChains/PS4CPU.h
+++ b/clang/lib/Driver/ToolChains/PS4CPU.h
@@ -94,9 +94,8 @@ class LLVM_LIBRARY_VISIBILITY PS4CPU : public Generic_ELF {
     Action::OffloadKind DeviceOffloadingKind) const override;
 
   llvm::DenormalMode getDefaultDenormalModeForType(
-    const llvm::opt::ArgList &DriverArgs,
-    Action::OffloadKind DeviceOffloadKind,
-    const llvm::fltSemantics *FPType) const override {
+      const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
+      const llvm::fltSemantics *FPType) const override {
     // DAZ and FTZ are on by default.
     return llvm::DenormalMode::getPreserveSign();
   }

diff  --git a/clang/test/Driver/cuda-flush-denormals-to-zero.cu b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
index 5b1046b0cb12..a515b5f8ca07 100644
--- a/clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ b/clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -7,16 +7,28 @@
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
-// Test explicit argument.
+// Test explicit argument, with CUDA offload kind
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
+// Test explicit argument, with HIP offload kind
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
-// Test the default changing with no argument based on the subtarget.
+// Test the default changing with no argument based on the subtarget in HIP mode
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
+
+// Test multiple offload archs with 
diff erent defaults.
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fcuda-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZX2 %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -fno-cuda-flush-denormals-to-zero --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
+
 // CPUFTZ-NOT: -fdenormal-fp-math
 
 // FTZ-NOT: -fdenormal-fp-math-f32=
@@ -25,3 +37,13 @@
 // The default of ieee is omitted
 // NOFTZ-NOT: "-fdenormal-fp-math"
 // NOFTZ-NOT: "-fdenormal-fp-math-f32"
+
+// MIXED-DEFAULT-MODE-NOT: -denormal-fp-math
+// MIXED-DEFAULT-MODE: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
+// MIXED-DEFAULT-MODE-SAME: "-target-cpu" "gfx803"
+// MIXED-DEFAULT-MODE-NOT: -denormal-fp-math
+
+// FTZX2: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
+// FTZX2-SAME: "-target-cpu" "gfx803"
+// FTZX2: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
+// FTZX2-SAME: "-target-cpu" "gfx900"


        


More information about the cfe-commits mailing list