[clang] cb015b9 - [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (#90377)

Sat May 4 08:47:52 PDT 2024

Author: Karl-Johan Karlsson
Date: 2024-05-04T17:47:48+02:00
New Revision: cb015b9ec9446b3a1303980c095fa442d5e46fbf

URL: https://github.com/llvm/llvm-project/commit/cb015b9ec9446b3a1303980c095fa442d5e46fbf
DIFF: https://github.com/llvm/llvm-project/commit/cb015b9ec9446b3a1303980c095fa442d5e46fbf.diff

LOG: [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (#90377)

This is a fix for the issue #87758 where fast-math flags are not
propagated all builtins.

It seems like pragmas with fast math flags was only propagated to calls
of unary floating point builtins. This patch propagate them also for
binary and ternary floating point builtins.

Added: 
    clang/test/CodeGen/pr87758.c

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/test/CodeGen/math-errno.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9d0e6627a52016..8e31652f4dabef 100644

--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -513,8 +513,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
 
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
     return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
   } else {
@@ -530,8 +530,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin(
   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
 
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
                                        {Src0->getType(), Src1->getType()});
     return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
@@ -551,8 +551,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
 
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
     return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
   } else {
@@ -704,6 +704,7 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
 
 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
                               const CallExpr *E, llvm::Constant *calleeValue) {
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
 }
@@ -2660,7 +2661,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     if (OP.hasMathErrnoOverride())
       ErrnoOverriden = OP.getMathErrnoOverride();
   }
-  // True if 'atttibute__((optnone)) is used. This attibute overrides
+  // True if 'attribute__((optnone))' is used. This attribute overrides
   // fast-math which implies math-errno.
   bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
 

diff  --git a/clang/test/CodeGen/math-errno.c b/clang/test/CodeGen/math-errno.c
index b5354e47e26b77..15340a11150c1a 100644
--- a/clang/test/CodeGen/math-errno.c
+++ b/clang/test/CodeGen/math-errno.c
@@ -27,7 +27,7 @@ float f1(float x) {
 // CHECK: tail call float @sqrtf(float noundef {{.*}}) #[[ATTR4_O2:[0-9]+]]
 
 // FAST-LABEL: define {{.*}} nofpclass(nan inf) float @f1
-// FAST: call fast nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR3_FAST:[0-9]+]]
+// FAST: call nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR3_FAST:[0-9]+]]
 
 // NOOPT-LABEL: define {{.*}} float @f1
 // NOOPT: call float @sqrtf(float noundef {{.*}}) #[[ATTR4_NOOPT:[0-9]+]]
@@ -44,7 +44,7 @@ float f2(float x) {
 // FAST: call fast float @llvm.sqrt.f32(float {{.*}})
 
 // NOOPT-LABEL: define {{.*}} float @f2
-// NOOPT: call float @sqrtf(float {{.*}}) #[[ATTR4_NOOPT:[0-9]+]]
+// NOOPT: call fast float @sqrtf(float {{.*}}) #[[ATTR4_NOOPT:[0-9]+]]
 
 __attribute__((optnone))
 float f3(float x) {
@@ -56,7 +56,7 @@ float f3(float x) {
 // CHECK: call float @sqrtf(float noundef {{.*}})
 
 // FAST-LABEL: define {{.*}} nofpclass(nan inf) float @f3
-// FAST: call fast nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR4_FAST:[0-9]+]]
+// FAST: call nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR4_FAST:[0-9]+]]
 
 // NOOPT-LABEL: define {{.*}} float @f3
 // NOOPT:  call float @sqrtf(float noundef %0) #[[ATTR4_NOOPT:[0-9]+]]

diff  --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c
new file mode 100644
index 00000000000000..1357449187ecc7
--- /dev/null
+++ b/clang/test/CodeGen/pr87758.c
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+// precise mode
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fmath-errno -ffp-contract=on \
+// RUN: -fno-rounding-math -emit-llvm  -o - %s | FileCheck \
+// RUN: --check-prefix=CHECK-PRECISE %s
+
+// fast mode
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast \
+// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-FAST %s
+
+// Reproducer for issue #87758
+// The testcase below verifies that the "fast" flag are set on the calls.
+
+float sqrtf(float x); // unary fp builtin
+float powf(float x, float y); // binary fp builtin
+float fmaf(float x, float y, float z); // ternary fp builtin
+char *rindex(const char *s, int c); // not a fp builtin
+
+#pragma float_control(push)
+#pragma float_control(precise, off)
+// CHECK: define dso_local float @fp_precise_off_libm_calls(
+// CHECK: call fast float @llvm.sqrt.f32(
+// CHECK: call fast float @llvm.pow.f32(
+// CHECK: call fast float @llvm.fma.f32(
+// CHECK: call ptr @rindex(
+
+// CHECK-PRECISE: define dso_local float @fp_precise_off_libm_calls(
+// CHECK-PRECISE: call fast float @sqrtf(
+// CHECK-PRECISE: call fast float @powf(
+// CHECK-PRECISE: call fast float @llvm.fma.f32(
+// CHECK-PRECISE: call ptr @rindex(
+
+// CHECK-FAST: define dso_local nofpclass(nan inf) float @fp_precise_off_libm_calls(
+// CHECK-FAST: call fast float @llvm.sqrt.f32(
+// CHECK-FAST: call fast float @llvm.pow.f32(
+// CHECK-FAST: call fast float @llvm.fma.f32(
+// CHECK-FAST: call ptr @rindex(
+
+float fp_precise_off_libm_calls(float a, float b, float c, const char *d, char *e, unsigned char f) {
+  a = sqrtf(a);
+  a = powf(a,b);
+  a = fmaf(a,b,c);
+  e = rindex(d, 75);
+  return a;
+}
+#pragma float_control(pop)
+
+#pragma float_control(push)
+#pragma float_control(precise, on)
+// CHECK: define dso_local float @fp_precise_on_libm_calls(
+// CHECK: call float @sqrtf(
+// CHECK: call float @powf(
+// CHECK: call float @llvm.fma.f32(
+// CHECK: call ptr @rindex(
+
+// CHECK-PRECISE: define dso_local float @fp_precise_on_libm_calls(
+// CHECK-PRECISE: call float @sqrtf(
+// CHECK-PRECISE: call float @powf(
+// CHECK-PRECISE: call float @llvm.fma.f32(
+// CHECK-PRECISE: call ptr @rindex(
+
+// CHECK-FAST: define dso_local nofpclass(nan inf) float @fp_precise_on_libm_calls(
+// CHECK-FAST: call nofpclass(nan inf) float @sqrtf(
+// CHECK-FAST: call nofpclass(nan inf) float @powf(
+// CHECK-FAST: call float @llvm.fma.f32(
+// CHECK-FAST: call ptr @rindex(
+
+float fp_precise_on_libm_calls(float a, float b, float c, const char *d, char *e, unsigned char f) {
+  a = sqrtf(a);
+  a = powf(a,b);
+  a = fmaf(a,b,c);
+  e = rindex(d, 75);
+  return a;
+}
+#pragma float_control(pop)