[PATCH] Allow speculating llvm.sqrt, fma and fmuladd

Matt Arsenault Matthew.Arsenault at amd.com
Wed Jan 22 22:04:23 PST 2014


  Explicitly state in the documentation that llvm.fma and llvm.fmuladd do not set errno.

http://llvm-reviews.chandlerc.com/D2428

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D2428?vs=6150&id=6592#toc

Files:
  docs/LangRef.rst
  lib/Analysis/ValueTracking.cpp
  test/Transforms/SimplifyCFG/speculate-math.ll

Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -7485,7 +7485,7 @@
 """"""""""
 
 This function returns the same values as the libm ``fma`` functions
-would.
+would, and does not set errno.
 
 '``llvm.fabs.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -8293,7 +8293,8 @@
 not be performed between the multiplication and addition steps if the
 code generator fuses the operations. Fusion is not guaranteed, even if
 the target platform supports it. If a fused multiply-add is required the
-corresponding llvm.fma.\* intrinsic function should be used instead.
+corresponding llvm.fma.\* intrinsic function should be used
+instead. This never sets errno, just as '``llvm.fma.*``'.
 
 Examples:
 """""""""
Index: lib/Analysis/ValueTracking.cpp
===================================================================
--- lib/Analysis/ValueTracking.cpp
+++ lib/Analysis/ValueTracking.cpp
@@ -2035,6 +2035,12 @@
        case Intrinsic::umul_with_overflow:
        case Intrinsic::usub_with_overflow:
          return true;
+       // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set
+       // errno like libm sqrt would.
+       case Intrinsic::sqrt:
+       case Intrinsic::fma:
+       case Intrinsic::fmuladd:
+         return true;
        // TODO: some fp intrinsics are marked as having the same error handling
        // as libm. They're safe to speculate when they won't error.
        // TODO: are convert_{from,to}_fp16 safe?
Index: test/Transforms/SimplifyCFG/speculate-math.ll
===================================================================
--- /dev/null
+++ test/Transforms/SimplifyCFG/speculate-math.ll
@@ -0,0 +1,58 @@
+; RUN: opt -S -simplifycfg -phi-node-folding-threshold=2 < %s | FileCheck %s
+
+declare float @llvm.sqrt.f32(float) nounwind readonly
+declare float @llvm.fma.f32(float, float, float) nounwind readonly
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readonly
+
+; CHECK-LABEL: @sqrt_test(
+; CHECK-NOT: call
+define void @sqrt_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_sqrt.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.sqrt.f32(float %a) nounwind readnone
+  br label %test_sqrt.exit
+
+test_sqrt.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+
+; CHECK-LABEL: @fma_test(
+; CHECK-NOT: call
+define void @fma_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_fma.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  br label %test_fma.exit
+
+test_fma.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK-LABEL: @fmuladd_test(
+; CHECK-NOT: call
+define void @fmuladd_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_fmuladd.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.fmuladd.f32(float %a, float %b, float %c) nounwind readnone
+  br label %test_fmuladd.exit
+
+test_fmuladd.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2428.2.patch
Type: text/x-patch
Size: 4051 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140122/47764895/attachment.bin>


More information about the llvm-commits mailing list