[PATCH] D31806: [SimplifyLibCalls] Fix infinite loop with fast-math optimization.

Mon Apr 10 11:05:15 PDT 2017

andrewng created this revision.

One of the fast-math optimizations is to replace calls to standard double
functions with their float equivalents, e.g. exp -> expf. However, this can
cause infinite loops for the following:

  float expf(float val) { return (float) exp((double) val); }

So this fix checks that the calling function to the standard double function
that is being replaced does not match the float equivalent.



Index: test/Transforms/Util/libcalls-fast-math-inf-loop.ll
--- /dev/null
+++ test/Transforms/Util/libcalls-fast-math-inf-loop.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -O2 -o - %s | FileCheck %s
+; Test that -O2 -ffast-math lib call simplification of double math function to
+; float equivalent doesn't occur when the calling function matches the float
+; equivalent math function. Otherwise this will create an infinite loop.
+; Test case C source:
+;   extern double exp(double x);
+;   inline float expf(float x) { return (float) exp((double) x); }
+;   float fn(float f) { return expf(f); }
+; IR generated with command:
+;   clang -cc1 -O2 -ffast-math -emit-llvm -disable-llvm-passes -triple x86_64-unknown-unknown -o - <srcfile>
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+; Function Attrs: nounwind
+define float @fn(float %f) #0 {
+; CHECK: define float @fn
+; CHECK-NOT: br label
+; CHECK: {{call.*}} float @exp
+  %f.addr = alloca float, align 4
+  store float %f, float* %f.addr, align 4, !tbaa !1
+  %1 = load float, float* %f.addr, align 4, !tbaa !1
+  %call = call fast float @expf(float %1) #3
+  ret float %call
+; Function Attrs: inlinehint nounwind readnone
+define available_externally float @expf(float %x) #1 {
+  %x.addr = alloca float, align 4
+  store float %x, float* %x.addr, align 4, !tbaa !1
+  %1 = load float, float* %x.addr, align 4, !tbaa !1
+  %conv = fpext float %1 to double
+  %call = call fast double @exp(double %conv) #3
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+; Function Attrs: nounwind readnone
+declare double @exp(double) #2
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { inlinehint nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+!llvm.ident = !{!0}
+!0 = !{!"clang version 5.0.0"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"float", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
Index: lib/Transforms/Utils/SimplifyLibCalls.cpp
--- lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -930,6 +930,23 @@
   if (V == nullptr)
     return nullptr;
+  // If call isn't an intrinsic, check that it isn't within a function with the
+  // same name and signature as the float version of this call!
+  // e.g. float floorf(float val) { return (float) floor((double) val); }
+  if (!Callee->isIntrinsic()) {
+    const Function *F = CI->getFunction();
+    StringRef FName = F->getName();
+    StringRef CalleeName = Callee->getName();
+    if ((FName.size() == (CalleeName.size() + 1)) &&
+        (FName.back() == 'f') &&
+        FName.startswith(CalleeName)) {
+      const FunctionType *FT = F->getFunctionType();
+      if (FT->getReturnType()->isFloatTy() && (FT->getNumParams() == 1) &&
+          FT->getParamType(0)->isFloatTy())
+        return nullptr;
+    }
+  }
   // Propagate fast-math flags from the existing call to the new call.
   IRBuilder<>::FastMathFlagGuard Guard(B);

