[clang] [clang][CodeGen] Propagate pragma set fast-math flags to floating point builtins (PR #90377)

Sat Apr 27 23:43:22 PDT 2024

https://github.com/karka228 created https://github.com/llvm/llvm-project/pull/90377

This is a fix for the issue #87758 where fast-math flags are not propagated all builtins.

It seems like pragmas with fast math flags was only propagated to calls of unary floating point builtins. This patch propagate them also for binary and ternary floating point builtins.


>From 80836f51573c8ba474a0b5d5890a489d5f0c7034 Mon Sep 17 00:00:00 2001
From: Karl-Johan Karlsson <karl-johan.karlsson at ericsson.com>
Date: Sat, 27 Apr 2024 22:04:11 +0200
Subject: [PATCH 1/2] Pre-commit reproducer for fast-math flags on calls

Adding test case related to issue #87758

The testcase show the faulty behavior where the calls to llvm.pow.f32
and llvm.fma.f32 are not attributed with the "fast" flag.
---
 clang/test/CodeGen/pr87758.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 clang/test/CodeGen/pr87758.c

diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c
new file mode 100644
index 00000000000000..5c5a6de442c2ac
--- /dev/null
+++ b/clang/test/CodeGen/pr87758.c
@@ -0,0 +1,29 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \
+// RUN: | FileCheck -check-prefix=CHECK %s
+
+// FIXME: Reproducer for issue #87758
+// The testcase below show the faulty behavior where the calls to
+// llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag.
+
+float sqrtf(float x);
+float powf(float x, float y);
+float fmaf(float x, float y, float z);
+
+#pragma float_control(push)
+#pragma float_control(precise, off)
+// CHECK-LABEL: define dso_local float @fp_precise_libm_calls(
+// CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.pow.f32(float [[TMP0]], float [[B]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]])
+// CHECK-NEXT:    ret float [[TMP2]]
+//
+float fp_precise_libm_calls(float a, float b, float c) {
+  a = sqrtf(a);
+  a = powf(a,b);
+  a = fmaf(a,b,c);
+  return a;
+}
+#pragma float_control(pop)

>From 4af840da9ec863f6372083f5e96998fb07db6b9c Mon Sep 17 00:00:00 2001
From: Karl-Johan Karlsson <karl-johan.karlsson at ericsson.com>
Date: Sat, 27 Apr 2024 22:37:34 +0200
Subject: [PATCH 2/2] [clang][CodeGen] Propagate pragma set fast-math flags to
 floating point builtins

This is a fix for the issue #87758 where fast-math flags are not propagated
all builtins.

It seems like pragmas with fast math flags was only propagated to calls of
unary floating point builtins. This patch propagate them also for binary and
ternary floating point builtins.
---
 clang/lib/CodeGen/CGBuiltin.cpp | 6 +++---
 clang/test/CodeGen/pr87758.c    | 9 ++++-----
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d08ab539148914..d61a63ac61572d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -513,8 +513,8 @@ static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
 
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
     return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });
   } else {
@@ -530,8 +530,8 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin(
   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
 
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,
                                        {Src0->getType(), Src1->getType()});
     return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});
@@ -551,8 +551,8 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,
   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
 
+  CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
   if (CGF.Builder.getIsFPConstrained()) {
-    CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);
     Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());
     return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });
   } else {
diff --git a/clang/test/CodeGen/pr87758.c b/clang/test/CodeGen/pr87758.c
index 5c5a6de442c2ac..05b3232986e0a6 100644
--- a/clang/test/CodeGen/pr87758.c
+++ b/clang/test/CodeGen/pr87758.c
@@ -2,9 +2,8 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -O3 -emit-llvm -o - %s \
 // RUN: | FileCheck -check-prefix=CHECK %s
 
-// FIXME: Reproducer for issue #87758
-// The testcase below show the faulty behavior where the calls to
-// llvm.pow.f32 and llvm.fma.f32 are not attributed with the "fast" flag.
+// Reproducer for issue #87758
+// The testcase below verifies that the "fast" flag are set on the calls.
 
 float sqrtf(float x);
 float powf(float x, float y);
@@ -16,8 +15,8 @@ float fmaf(float x, float y, float z);
 // CHECK-SAME: float noundef [[A:%.*]], float noundef [[B:%.*]], float noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[A]])
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.pow.f32(float [[TMP0]], float [[B]])
-// CHECK-NEXT:    [[TMP2:%.*]] = tail call float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.pow.f32(float [[TMP0]], float [[B]])
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.fma.f32(float [[TMP1]], float [[B]], float [[C]])
 // CHECK-NEXT:    ret float [[TMP2]]
 //
 float fp_precise_libm_calls(float a, float b, float c) {