[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)

Karl-Johan Karlsson via cfe-commits cfe-commits at lists.llvm.org
Thu May 2 02:46:55 PDT 2024


https://github.com/karka228 updated https://github.com/llvm/llvm-project/pull/90377

>From 80836f51573c8ba474a0b5d5890a489d5f0c7034 Mon Sep 17 00:00:00 2001
From: Karl-Johan Karlsson <karl-johan.karlsson at ericsson.com>
Date: Sat, 27 Apr 2024 22:04:11 +0200
Subject: [PATCH 1/4] Pre-commit reproducer for fast-math flags on calls

Adding test case related to issue #87758

The testcase show the faulty behavior where the calls to llvm.pow.f32
and llvm.fma.f32 are not attributed with the "fast" flag.
---
 clang/test/CodeGen/pr87758.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 clang/test/CodeGen/pr87758.c

diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c
new file mode 100644
index 00000000000000..5c5a6de442c2ac
--- /dev/null
+++ b/clang/test/CodeGen/pr87758.c
@@ -0,0 +1,29 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \
+// RUN: | FileCheck -check-prefix=CHECK %s
+
+// FIXME: Reproducer for issue #87758
+// The testcase below show the faulty behavior where the calls to
+// llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag.
+
+float sqrtf(float x);
+float powf(float x, float y);
+float fmaf(float x, float y, float z);
+
+#pragma float_control(push)
+#pragma float_control(precise, off)
+// CHECK-LABEL: define dso_local float @fp_precise_libm_calls(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.pow.f32(float [[TMP0]], float [[B]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]])
+// CHECK-NEXT:    ret float [[TMP2]]
+//
+float fp_precise_libm_calls(float a, float b, float c) {
+  a = sqrtf(a);
+  a = powf(a,b);
+  a = fmaf(a,b,c);
+  return a;
+}
+#pragma float_control(pop)

>From 4af840da9ec863f6372083f5e96998fb07db6b9c Mon Sep 17 00:00:00 2001
From: Karl-Johan Karlsson <karl-johan.karlsson at ericsson.com>
Date: Sat, 27 Apr 2024 22:37:34 +0200
Subject: [PATCH 2/4] [clang][CodeGen] Propagate pragma set fast-math flags to
 floating point builtins

This is a fix for the issue #87758 where fast-math flags are not propagated
all builtins.

It seems like pragmas with fast math flags was only propagated to calls of
unary floating point builtins. This patch propagate them also for binary and
ternary floating point builtins.
---
 clang/lib/CodeGen/CGBuiltin.cpp | 6 +++---
 clang/test/CodeGen/pr87758.c    | 9 ++++-----
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d08ab539148914..d61a63ac61572d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -513,8 +513,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
 
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
     return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
   } else {
@@ -530,8 +530,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin(
   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
 
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
                                        {Src0->getType(), Src1->getType()});
     return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
@@ -551,8 +551,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
 
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
     return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
   } else {
diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c
index 5c5a6de442c2ac..05b3232986e0a6 100644
--- a/clang/test/CodeGen/pr87758.c
+++ b/clang/test/CodeGen/pr87758.c
@@ -2,9 +2,8 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \
 // RUN: | FileCheck -check-prefix=CHECK %s
 
-// FIXME: Reproducer for issue #87758
-// The testcase below show the faulty behavior where the calls to
-// llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag.
+// Reproducer for issue #87758
+// The testcase below verifies that the "fast" flag are set on the calls.
 
 float sqrtf(float x);
 float powf(float x, float y);
@@ -16,8 +15,8 @@ float fmaf(float x, float y, float z);
 // CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.pow.f32(float [[TMP0]], float [[B]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.pow.f32(float [[TMP0]], float [[B]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]])
 // CHECK-NEXT:    ret float [[TMP2]]
 //
 float fp_precise_libm_calls(float a, float b, float c) {

>From e1b01ae9bab3dcc25a462a32a39041ecb20f8f24 Mon Sep 17 00:00:00 2001
From: Karl-Johan Karlsson <karl-johan.karlsson at ericsson.com>
Date: Tue, 30 Apr 2024 07:56:26 +0200
Subject: [PATCH 3/4] [clang][CodeGen] Propagate pragma set fast-math flags to
 floating point builtins

Minor fix in emitLibraryCall() and updated testcase.
---
 clang/lib/CodeGen/CGBuiltin.cpp |   1 +
 clang/test/CodeGen/math-errno.c |   6 +-
 clang/test/CodeGen/pr87758.c    | 116 +++++++++++++++++++++++++++++---
 3 files changed, 112 insertions(+), 11 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d61a63ac61572d..23c10e5e052c3c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -704,6 +704,7 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
 
 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,
                               const CallExpr *E, llvm::Constant *calleeValue) {
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
   return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
 }
diff --git a/clang/test/CodeGen/math-errno.c b/clang/test/CodeGen/math-errno.c
index b5354e47e26b77..15340a11150c1a 100644
--- a/clang/test/CodeGen/math-errno.c
+++ b/clang/test/CodeGen/math-errno.c
@@ -27,7 +27,7 @@ float f1(float x) {
 // CHECK: tail call float @sqrtf(float noundef {{.*}}) #[[ATTR4_O2:[0-9]+]]
 
 // FAST-LABEL: define {{.*}} nofpclass(nan inf) float @f1
-// FAST: call fast nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR3_FAST:[0-9]+]]
+// FAST: call nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR3_FAST:[0-9]+]]
 
 // NOOPT-LABEL: define {{.*}} float @f1
 // NOOPT: call float @sqrtf(float noundef {{.*}}) #[[ATTR4_NOOPT:[0-9]+]]
@@ -44,7 +44,7 @@ float f2(float x) {
 // FAST: call fast float @llvm.sqrt.f32(float {{.*}})
 
 // NOOPT-LABEL: define {{.*}} float @f2
-// NOOPT: call float @sqrtf(float {{.*}}) #[[ATTR4_NOOPT:[0-9]+]]
+// NOOPT: call fast float @sqrtf(float {{.*}}) #[[ATTR4_NOOPT:[0-9]+]]
 
 __attribute__((optnone))
 float f3(float x) {
@@ -56,7 +56,7 @@ float f3(float x) {
 // CHECK: call float @sqrtf(float noundef {{.*}})
 
 // FAST-LABEL: define {{.*}} nofpclass(nan inf) float @f3
-// FAST: call fast nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR4_FAST:[0-9]+]]
+// FAST: call nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) {{.*}}) #[[ATTR4_FAST:[0-9]+]]
 
 // NOOPT-LABEL: define {{.*}} float @f3
 // NOOPT:  call float @sqrtf(float noundef %0) #[[ATTR4_NOOPT:[0-9]+]]
diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c
index 05b3232986e0a6..e188ce617a0bf5 100644
--- a/clang/test/CodeGen/pr87758.c
+++ b/clang/test/CodeGen/pr87758.c
@@ -1,6 +1,8 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s \
 // RUN: | FileCheck -check-prefix=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast -emit-llvm -o - %s \
+// RUN: | FileCheck -check-prefix=CHECK-FAST %s
 
 // Reproducer for issue #87758
 // The testcase below verifies that the "fast" flag are set on the calls.
@@ -11,15 +13,113 @@ float fmaf(float x, float y, float z);
 
 #pragma float_control(push)
 #pragma float_control(precise, off)
-// CHECK-LABEL: define dso_local float @fp_precise_libm_calls(
-// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-LABEL: define dso_local float @fp_precise_off_libm_calls(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.pow.f32(float [[TMP0]], float [[B]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]])
-// CHECK-NEXT:    ret float [[TMP2]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store float [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store float [[C]], ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP0]])
+// CHECK-NEXT:    store float [[TMP1]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.pow.f32(float [[TMP2]], float [[TMP3]])
+// CHECK-NEXT:    store float [[TMP4]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]])
+// CHECK-NEXT:    store float [[TMP8]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    ret float [[TMP9]]
 //
-float fp_precise_libm_calls(float a, float b, float c) {
+// CHECK-FAST-LABEL: define dso_local nofpclass(nan inf) float @fp_precise_off_libm_calls(
+// CHECK-FAST-SAME: float noundef nofpclass(nan inf) [[A:%.*]], float noundef nofpclass(nan inf) [[B:%.*]], float noundef nofpclass(nan inf) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-FAST-NEXT:  entry:
+// CHECK-FAST-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-FAST-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4
+// CHECK-FAST-NEXT:    [[C_ADDR:%.*]] = alloca float, align 4
+// CHECK-FAST-NEXT:    store float [[A]], ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    store float [[B]], ptr [[B_ADDR]], align 4
+// CHECK-FAST-NEXT:    store float [[C]], ptr [[C_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP1:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP0]])
+// CHECK-FAST-NEXT:    store float [[TMP1]], ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP2:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP3:%.*]] = load float, ptr [[B_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP4:%.*]] = call fast float @llvm.pow.f32(float [[TMP2]], float [[TMP3]])
+// CHECK-FAST-NEXT:    store float [[TMP4]], ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP5:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP6:%.*]] = load float, ptr [[B_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP7:%.*]] = load float, ptr [[C_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP8:%.*]] = call fast float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]])
+// CHECK-FAST-NEXT:    store float [[TMP8]], ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP9:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    ret float [[TMP9]]
+//
+float fp_precise_off_libm_calls(float a, float b, float c) {
+  a = sqrtf(a);
+  a = powf(a,b);
+  a = fmaf(a,b,c);
+  return a;
+}
+#pragma float_control(pop)
+
+#pragma float_control(push)
+#pragma float_control(precise, on)
+// CHECK-LABEL: define dso_local float @fp_precise_on_libm_calls(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store float [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store float [[C]], ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[CALL:%.*]] = call float @sqrtf(float noundef [[TMP0]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    store float [[CALL]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[CALL1:%.*]] = call float @powf(float noundef [[TMP1]], float noundef [[TMP2]]) #[[ATTR3]]
+// CHECK-NEXT:    store float [[CALL1]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP3]], float [[TMP4]], float [[TMP5]])
+// CHECK-NEXT:    store float [[TMP6]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    ret float [[TMP7]]
+//
+// CHECK-FAST-LABEL: define dso_local nofpclass(nan inf) float @fp_precise_on_libm_calls(
+// CHECK-FAST-SAME: float noundef nofpclass(nan inf) [[A:%.*]], float noundef nofpclass(nan inf) [[B:%.*]], float noundef nofpclass(nan inf) [[C:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-FAST-NEXT:  entry:
+// CHECK-FAST-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-FAST-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4
+// CHECK-FAST-NEXT:    [[C_ADDR:%.*]] = alloca float, align 4
+// CHECK-FAST-NEXT:    store float [[A]], ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    store float [[B]], ptr [[B_ADDR]], align 4
+// CHECK-FAST-NEXT:    store float [[C]], ptr [[C_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[CALL:%.*]] = call nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) [[TMP0]]) #[[ATTR4:[0-9]+]]
+// CHECK-FAST-NEXT:    store float [[CALL]], ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP1:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP2:%.*]] = load float, ptr [[B_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[CALL1:%.*]] = call nofpclass(nan inf) float @powf(float noundef nofpclass(nan inf) [[TMP1]], float noundef nofpclass(nan inf) [[TMP2]]) #[[ATTR4]]
+// CHECK-FAST-NEXT:    store float [[CALL1]], ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP3:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP4:%.*]] = load float, ptr [[B_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP5:%.*]] = load float, ptr [[C_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP3]], float [[TMP4]], float [[TMP5]])
+// CHECK-FAST-NEXT:    store float [[TMP6]], ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    [[TMP7:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-FAST-NEXT:    ret float [[TMP7]]
+//
+float fp_precise_on_libm_calls(float a, float b, float c) {
   a = sqrtf(a);
   a = powf(a,b);
   a = fmaf(a,b,c);

>From 251009f4acd3a23d01b55c78d234a3df0e04e464 Mon Sep 17 00:00:00 2001
From: Karl-Johan Karlsson <karl-johan.karlsson at ericsson.com>
Date: Thu, 2 May 2024 11:35:27 +0200
Subject: [PATCH 4/4] [clang][CodeGen] Propagate pragma set fast-math flags to
 floating point builtins

Updated testcase.
---
 clang/lib/CodeGen/CGBuiltin.cpp |   2 +-
 clang/test/CodeGen/pr87758.c    | 139 ++++++++------------------------
 2 files changed, 34 insertions(+), 107 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 23c10e5e052c3c..6f488fc8c873e6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2661,7 +2661,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     if (OP.hasMathErrnoOverride())
       ErrnoOverriden = OP.getMathErrnoOverride();
   }
-  // True if 'atttibute__((optnone)) is used. This attibute overrides
+  // True if 'attribute__((optnone))' is used. This attribute overrides
   // fast-math which implies math-errno.
   bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();
 
diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c
index e188ce617a0bf5..45d8fb2de2c954 100644
--- a/clang/test/CodeGen/pr87758.c
+++ b/clang/test/CodeGen/pr87758.c
@@ -1,128 +1,55 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s \
-// RUN: | FileCheck -check-prefix=CHECK %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast -emit-llvm -o - %s \
-// RUN: | FileCheck -check-prefix=CHECK-FAST %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -fmath-errno -ffp-contract=on -fno-rounding-math -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang -O3 -S -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s
+// RUN: %clang -O3 -ffp-model=fast -S -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s
+// RUN: %clang -O3 -ffp-model=precise -S -emit-llvm -Xclang -disable-llvm-passes %s -o - | FileCheck %s
 
 // Reproducer for issue #87758
 // The testcase below verifies that the "fast" flag are set on the calls.
 
-float sqrtf(float x);
-float powf(float x, float y);
-float fmaf(float x, float y, float z);
+float sqrtf(float x); // unary fp builtin
+float powf(float x, float y); // binary fp builtin
+float fmaf(float x, float y, float z); // ternary fp builtin
+char *rindex(const char *s, int c); // not a fp builtin
 
 #pragma float_control(push)
 #pragma float_control(precise, off)
-// CHECK-LABEL: define dso_local float @fp_precise_off_libm_calls(
-// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4
-// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca float, align 4
-// CHECK-NEXT:    store float [[A]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    store float [[B]], ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    store float [[C]], ptr [[C_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP0]])
-// CHECK-NEXT:    store float [[TMP1]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.pow.f32(float [[TMP2]], float [[TMP3]])
-// CHECK-NEXT:    store float [[TMP4]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[C_ADDR]], align 4
-// CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]])
-// CHECK-NEXT:    store float [[TMP8]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP9:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    ret float [[TMP9]]
-//
-// CHECK-FAST-LABEL: define dso_local nofpclass(nan inf) float @fp_precise_off_libm_calls(
-// CHECK-FAST-SAME: float noundef nofpclass(nan inf) [[A:%.*]], float noundef nofpclass(nan inf) [[B:%.*]], float noundef nofpclass(nan inf) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-FAST-NEXT:  entry:
-// CHECK-FAST-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK-FAST-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4
-// CHECK-FAST-NEXT:    [[C_ADDR:%.*]] = alloca float, align 4
-// CHECK-FAST-NEXT:    store float [[A]], ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    store float [[B]], ptr [[B_ADDR]], align 4
-// CHECK-FAST-NEXT:    store float [[C]], ptr [[C_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP1:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP0]])
-// CHECK-FAST-NEXT:    store float [[TMP1]], ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP2:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP3:%.*]] = load float, ptr [[B_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP4:%.*]] = call fast float @llvm.pow.f32(float [[TMP2]], float [[TMP3]])
-// CHECK-FAST-NEXT:    store float [[TMP4]], ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP5:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP6:%.*]] = load float, ptr [[B_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP7:%.*]] = load float, ptr [[C_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP8:%.*]] = call fast float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]])
-// CHECK-FAST-NEXT:    store float [[TMP8]], ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP9:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    ret float [[TMP9]]
-//
-float fp_precise_off_libm_calls(float a, float b, float c) {
+// CHECK-LABEL: define
+// CHECK-SAME:  float @fp_precise_off_libm_calls(
+// CHECK: %{{.*}} = call fast float @llvm.sqrt.f32(
+// CHECK: %{{.*}} = call fast float @llvm.pow.f32(
+// CHECK: %{{.*}} = call fast float @llvm.fma.f32(
+// CHECK: %{{.*}} = call ptr @rindex(
+
+float fp_precise_off_libm_calls(float a, float b, float c, const char *d, char *e, unsigned char f) {
   a = sqrtf(a);
   a = powf(a,b);
   a = fmaf(a,b,c);
+  e = rindex(d, 75);
   return a;
 }
 #pragma float_control(pop)
 
 #pragma float_control(push)
 #pragma float_control(precise, on)
-// CHECK-LABEL: define dso_local float @fp_precise_on_libm_calls(
-// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4
-// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca float, align 4
-// CHECK-NEXT:    store float [[A]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    store float [[B]], ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    store float [[C]], ptr [[C_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[CALL:%.*]] = call float @sqrtf(float noundef [[TMP0]]) #[[ATTR3:[0-9]+]]
-// CHECK-NEXT:    store float [[CALL]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[CALL1:%.*]] = call float @powf(float noundef [[TMP1]], float noundef [[TMP2]]) #[[ATTR3]]
-// CHECK-NEXT:    store float [[CALL1]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[C_ADDR]], align 4
-// CHECK-NEXT:    [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP3]], float [[TMP4]], float [[TMP5]])
-// CHECK-NEXT:    store float [[TMP6]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    ret float [[TMP7]]
-//
-// CHECK-FAST-LABEL: define dso_local nofpclass(nan inf) float @fp_precise_on_libm_calls(
-// CHECK-FAST-SAME: float noundef nofpclass(nan inf) [[A:%.*]], float noundef nofpclass(nan inf) [[B:%.*]], float noundef nofpclass(nan inf) [[C:%.*]]) #[[ATTR2:[0-9]+]] {
-// CHECK-FAST-NEXT:  entry:
-// CHECK-FAST-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
-// CHECK-FAST-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4
-// CHECK-FAST-NEXT:    [[C_ADDR:%.*]] = alloca float, align 4
-// CHECK-FAST-NEXT:    store float [[A]], ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    store float [[B]], ptr [[B_ADDR]], align 4
-// CHECK-FAST-NEXT:    store float [[C]], ptr [[C_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[CALL:%.*]] = call nofpclass(nan inf) float @sqrtf(float noundef nofpclass(nan inf) [[TMP0]]) #[[ATTR4:[0-9]+]]
-// CHECK-FAST-NEXT:    store float [[CALL]], ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP1:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP2:%.*]] = load float, ptr [[B_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[CALL1:%.*]] = call nofpclass(nan inf) float @powf(float noundef nofpclass(nan inf) [[TMP1]], float noundef nofpclass(nan inf) [[TMP2]]) #[[ATTR4]]
-// CHECK-FAST-NEXT:    store float [[CALL1]], ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP3:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP4:%.*]] = load float, ptr [[B_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP5:%.*]] = load float, ptr [[C_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP3]], float [[TMP4]], float [[TMP5]])
-// CHECK-FAST-NEXT:    store float [[TMP6]], ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    [[TMP7:%.*]] = load float, ptr [[A_ADDR]], align 4
-// CHECK-FAST-NEXT:    ret float [[TMP7]]
-//
-float fp_precise_on_libm_calls(float a, float b, float c) {
+// CHECK-LABEL: define
+// CHECK-SAME:  float @fp_precise_on_libm_calls(
+// CHECK: %{{.*}} = call
+// CHECK-NOT: fast
+// CHECK-SAME: float @sqrtf(
+// CHECK: %{{.*}} = call
+// CHECK-NOT: fast
+// CHECK-SAME: float @powf(
+// CHECK: %{{.*}} = call float @llvm.fma.f32(
+// CHECK: %{{.*}} = call ptr @rindex(
+
+float fp_precise_on_libm_calls(float a, float b, float c, const char *d, char *e, unsigned char f) {
   a = sqrtf(a);
   a = powf(a,b);
   a = fmaf(a,b,c);
+  e = rindex(d, 75);
   return a;
 }
 #pragma float_control(pop)



More information about the cfe-commits mailing list