[PATCH] D32084: AMDGPU/GFX9: Set +fast-fmaf for >=gfx900 unless -cl-denorms-are-zero is set

Konstantin Zhuravlyov via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 14 07:54:54 PDT 2017


kzhuravl created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, wdng.

https://reviews.llvm.org/D32084

Files:
  lib/Basic/Targets.cpp
  test/CodeGenOpenCL/gfx9-fast-fmaf.cl


Index: test/CodeGenOpenCL/gfx9-fast-fmaf.cl
===================================================================
--- test/CodeGenOpenCL/gfx9-fast-fmaf.cl
+++ test/CodeGenOpenCL/gfx9-fast-fmaf.cl
@@ -0,0 +1,13 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck --check-prefix=DEFAULT %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature +fast-fmaf %s | FileCheck --check-prefix=FEATURE_FP32_DENORMALS_ON %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -target-feature -fast-fmaf %s | FileCheck --check-prefix=FEATURE_FP32_DENORMALS_OFF %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - -cl-denorms-are-zero %s | FileCheck --check-prefix=OPT_DENORMS_ARE_ZERO %s
+
+// DEFAULT: +fast-fmaf
+// FEATURE_FP32_DENORMALS_ON: +fast-fmaf
+// FEATURE_FP32_DENORMALS_OFF: -fast-fmaf
+// OPT_DENORMS_ARE_ZERO: -fast-fmaf
+
+kernel void gfx9_fast_fmaf() {}
\ No newline at end of file
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -2201,10 +2201,14 @@
       if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
         hasFP64Denormals = true;
     }
-    if (!hasFP32Denormals)
+    if (!hasFP32Denormals) {
       TargetOpts.Features.push_back(
           (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) &&
           !CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")).str());
+      TargetOpts.Features.push_back(
+          (Twine(hasFullSpeedFMAF32(TargetOpts.CPU) &&
+          !CGOpts.FlushDenorm ? '+' : '-') + Twine("fast-fmaf")).str());
+    }
     // Always do not flush fp64 or fp16 denorms.
     if (!hasFP64Denormals && hasFP64)
       TargetOpts.Features.push_back("+fp64-fp16-denormals");


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D32084.95298.patch
Type: text/x-patch
Size: 1962 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20170414/131dd2b9/attachment.bin>


More information about the cfe-commits mailing list