[PATCH] D20341: [CUDA] Enable fusing FP ops for CUDA by default.

Tue May 17 15:44:14 PDT 2016

tra created this revision.
tra added reviewers: jlebar, jingyue.
tra added a subscriber: cfe-commits.

This matches default nvcc behavior and gives substantial performance boost on GPU where fmad is much cheaper compared to add+mul.



http://reviews.llvm.org/D20341

Files:
  lib/Frontend/CompilerInvocation.cpp
  test/CodeGenCUDA/fp-contract.cu

Index: test/CodeGenCUDA/fp-contract.cu
===================================================================

--- /dev/null
+++ test/CodeGenCUDA/fp-contract.cu
@@ -0,0 +1,34 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// By default we should fuse multiply/add into llvm.fmuladd intrinsic
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \
+// RUN:   -disable-llvm-passes -o - %s | FileCheck -check-prefix ENABLED %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN:   -disable-llvm-passes -o - %s | FileCheck -check-prefix ENABLED %s
+
+// Explicit -ffp-contract=off should disable instruction fusing.
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \
+// RUN:   -ffp-contract=off -disable-llvm-passes -o - %s \
+// RUN:   | FileCheck -check-prefix DISABLED %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN:   -ffp-contract=off -disable-llvm-passes -o - %s \
+// RUN:   | FileCheck -check-prefix DISABLED %s
+
+// Explicit -ffp-contract=fast lets LLVM do the fusing, so no fusing in clang.
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \
+// RUN:   -ffp-contract=off -disable-llvm-passes -o - %s \
+// RUN:   | FileCheck -check-prefix DISABLED %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN:   -ffp-contract=off -disable-llvm-passes -o - %s \
+// RUN:   | FileCheck -check-prefix DISABLED %s
+
+#include "Inputs/cuda.h"
+
+__host__ __device__ float func(float a, float b, float c) { return a + b * c; }
+// ENABLED:       call float @llvm.fmuladd.f32
+// ENABLED-NEXT:  ret
+
+// DISABLED:      fmul float
+// DISABLED-NEXT: fadd float
+// DISABLED-NEXT: ret
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -1486,6 +1486,9 @@
   Opts.CUDA = IK == IK_CUDA || IK == IK_PreprocessedCuda ||
               LangStd == LangStandard::lang_cuda;
 
+  if (Opts.CUDA)
+    Opts.DefaultFPContract = 1;
+
   // OpenCL and C++ both have bool, true, false keywords.
   Opts.Bool = Opts.OpenCL || Opts.CPlusPlus;
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D20341.57534.patch
Type: text/x-patch
Size: 2241 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20160517/98cc29b1/attachment.bin>