[llvm] [InstCombine] Make fptrunc combine use union of fast math flags (PR #118808)

Thu Dec 5 07:49:39 PST 2024

https://github.com/john-brawn-arm updated https://github.com/llvm/llvm-project/pull/118808

>From b5e85543608915cbaa2d778cf96bf74ff718967d Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Tue, 3 Dec 2024 17:48:50 +0000
Subject: [PATCH 1/2] [InstCombine] Make fptrunc combine use union of fast math
 flags

These combines involve swapping the fptrunc with its operand, and
using the union of fast math flags is the safest option as e.g. if we
have (fptrunc (fneg ninf x)) then (fneg ninf (fptrunc x)) will not be
correct as if x is a not within the range of the destination type the
result of (fptrunc x) will be inf.
---
 .../InstCombine/InstCombineCasts.cpp          |  9 +--
 llvm/test/Transforms/InstCombine/fpcast.ll    | 55 +++++++++++++++++++
 llvm/test/Transforms/InstCombine/fptrunc.ll   | 24 ++++++++
 3 files changed, 84 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 7221c987b98219..a6c5507c764c76 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1847,15 +1847,16 @@ Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
   Value *X;
   Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
   if (Op && Op->hasOneUse()) {
-    // FIXME: The FMF should propagate from the fptrunc, not the source op.
     IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+    FastMathFlags FMF = FPT.getFastMathFlags();
     if (isa<FPMathOperator>(Op))
-      Builder.setFastMathFlags(Op->getFastMathFlags());
+      FMF &= Op->getFastMathFlags();
+    Builder.setFastMathFlags(FMF);
 
     if (match(Op, m_FNeg(m_Value(X)))) {
       Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
-
-      return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
+      Value *Neg = Builder.CreateFNeg(InnerTrunc);
+      return replaceInstUsesWith(FPT, Neg);
     }
 
     // If we are truncating a select that has an extended operand, we can
diff --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll
index 029e513ceafbcd..72bd42e60e0d27 100644
--- a/llvm/test/Transforms/InstCombine/fpcast.ll
+++ b/llvm/test/Transforms/InstCombine/fpcast.ll
@@ -29,6 +29,17 @@ define half @test3(float %a) {
   ret half %c
 }
 
+define half @test3_fast(float %a) {
+; CHECK-LABEL: @test3_fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    [[C:%.*]] = call half @llvm.fabs.f16(half [[TMP1]])
+; CHECK-NEXT:    ret half [[C]]
+;
+  %b = call float @llvm.fabs.f32(float %a)
+  %c = fptrunc fast float %b to half
+  ret half %c
+}
+
 define half @fneg_fptrunc(float %a) {
 ; CHECK-LABEL: @fneg_fptrunc(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc float [[A:%.*]] to half
@@ -78,6 +89,28 @@ define half @test4-fast(float %a) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc fast float [[A:%.*]] to half
 ; CHECK-NEXT:    [[C:%.*]] = fneg fast half [[TMP1]]
 ; CHECK-NEXT:    ret half [[C]]
+;
+  %b = fsub fast float -0.0, %a
+  %c = fptrunc fast float %b to half
+  ret half %c
+}
+
+define half @test4-mixed-fast-1(float %a) {
+; CHECK-LABEL: @test4-mixed-fast-1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    [[C:%.*]] = fneg half [[TMP1]]
+; CHECK-NEXT:    ret half [[C]]
+;
+  %b = fsub float -0.0, %a
+  %c = fptrunc fast float %b to half
+  ret half %c
+}
+
+define half @test4-mixed-fast-2(float %a) {
+; CHECK-LABEL: @test4-mixed-fast-2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    [[C:%.*]] = fneg half [[TMP1]]
+; CHECK-NEXT:    ret half [[C]]
 ;
   %b = fsub fast float -0.0, %a
   %c = fptrunc float %b to half
@@ -89,6 +122,28 @@ define half @test4_unary_fneg-fast(float %a) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc fast float [[A:%.*]] to half
 ; CHECK-NEXT:    [[C:%.*]] = fneg fast half [[TMP1]]
 ; CHECK-NEXT:    ret half [[C]]
+;
+  %b = fneg fast float %a
+  %c = fptrunc fast float %b to half
+  ret half %c
+}
+
+define half @test4_unary_fneg-mixed-fast-1(float %a) {
+; CHECK-LABEL: @test4_unary_fneg-mixed-fast-1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    [[C:%.*]] = fneg half [[TMP1]]
+; CHECK-NEXT:    ret half [[C]]
+;
+  %b = fneg float %a
+  %c = fptrunc fast float %b to half
+  ret half %c
+}
+
+define half @test4_unary_fneg-mixed-fast-2(float %a) {
+; CHECK-LABEL: @test4_unary_fneg-mixed-fast-2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc float [[A:%.*]] to half
+; CHECK-NEXT:    [[C:%.*]] = fneg half [[TMP1]]
+; CHECK-NEXT:    ret half [[C]]
 ;
   %b = fneg fast float %a
   %c = fptrunc float %b to half
diff --git a/llvm/test/Transforms/InstCombine/fptrunc.ll b/llvm/test/Transforms/InstCombine/fptrunc.ll
index a4296a326c4bc6..0b5d8b3cd06e07 100644
--- a/llvm/test/Transforms/InstCombine/fptrunc.ll
+++ b/llvm/test/Transforms/InstCombine/fptrunc.ll
@@ -61,6 +61,18 @@ define float @fptrunc_select_true_val(float %x, double %y, i1 %cond) {
   ret float %r
 }
 
+define float @fptrunc_fast_select_true_val(float %x, double %y, i1 %cond) {
+; CHECK-LABEL: @fptrunc_fast_select_true_val(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc fast double [[Y:%.*]] to float
+; CHECK-NEXT:    [[NARROW_SEL:%.*]] = select i1 [[COND:%.*]], float [[TMP1]], float [[X:%.*]]
+; CHECK-NEXT:    ret float [[NARROW_SEL]]
+;
+  %e = fpext float %x to double
+  %sel = select fast i1 %cond, double %y, double %e
+  %r = fptrunc fast double %sel to float
+  ret float %r
+}
+
 define <2 x float> @fptrunc_select_false_val(<2 x float> %x, <2 x double> %y, <2 x i1> %cond) {
 ; CHECK-LABEL: @fptrunc_select_false_val(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc <2 x double> [[Y:%.*]] to <2 x float>
@@ -73,6 +85,18 @@ define <2 x float> @fptrunc_select_false_val(<2 x float> %x, <2 x double> %y, <2
   ret <2 x float> %r
 }
 
+define <2 x float> @fptrunc_nnan_select_false_val(<2 x float> %x, <2 x double> %y, <2 x i1> %cond) {
+; CHECK-LABEL: @fptrunc_nnan_select_false_val(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc nnan <2 x double> [[Y:%.*]] to <2 x float>
+; CHECK-NEXT:    [[NARROW_SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x float> [[X:%.*]], <2 x float> [[TMP1]]
+; CHECK-NEXT:    ret <2 x float> [[NARROW_SEL]]
+;
+  %e = fpext <2 x float> %x to <2 x double>
+  %sel = select nnan <2 x i1> %cond, <2 x double> %e, <2 x double> %y
+  %r = fptrunc nnan <2 x double> %sel to <2 x float>
+  ret <2 x float> %r
+}
+
 declare void @use(float)
 
 define half @fptrunc_select_true_val_extra_use(half %x, float %y, i1 %cond) {

>From 67587867977f0e263b65011aa9e9313f29235a1b Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn at arm.com>
Date: Thu, 5 Dec 2024 15:27:42 +0000
Subject: [PATCH 2/2] Use dyn_cast, add test.

---
 .../InstCombine/InstCombineCasts.cpp          |  4 +--
 llvm/test/Transforms/InstCombine/fpcast.ll    | 33 +++++++++++++++++++
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index a6c5507c764c76..62853ca0f72b0a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1849,8 +1849,8 @@ Instruction *InstCombinerImpl::visitFPTrunc(FPTruncInst &FPT) {
   if (Op && Op->hasOneUse()) {
     IRBuilder<>::FastMathFlagGuard FMFG(Builder);
     FastMathFlags FMF = FPT.getFastMathFlags();
-    if (isa<FPMathOperator>(Op))
-      FMF &= Op->getFastMathFlags();
+    if (auto *FPMO = dyn_cast<FPMathOperator>(Op))
+      FMF &= FPMO->getFastMathFlags();
     Builder.setFastMathFlags(FMF);
 
     if (match(Op, m_FNeg(m_Value(X)))) {
diff --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll
index 72bd42e60e0d27..d5290b572aefd9 100644
--- a/llvm/test/Transforms/InstCombine/fpcast.ll
+++ b/llvm/test/Transforms/InstCombine/fpcast.ll
@@ -150,6 +150,39 @@ define half @test4_unary_fneg-mixed-fast-2(float %a) {
   ret half %c
 }
 
+define <2 x half> @test4_unary_fneg-vec-fast(<2 x float> %a) {
+; CHECK-LABEL: @test4_unary_fneg-vec-fast(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc fast <2 x float> [[A:%.*]] to <2 x half>
+; CHECK-NEXT:    [[C:%.*]] = fneg fast <2 x half> [[TMP1]]
+; CHECK-NEXT:    ret <2 x half> [[C]]
+;
+  %b = fneg fast <2 x float> %a
+  %c = fptrunc fast <2 x float> %b to <2 x half>
+  ret <2 x half> %c
+}
+
+define <2 x half> @test4_unary_fneg-vec-mixed-fast-1(<2 x float> %a) {
+; CHECK-LABEL: @test4_unary_fneg-vec-mixed-fast-1(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc <2 x float> [[A:%.*]] to <2 x half>
+; CHECK-NEXT:    [[C:%.*]] = fneg <2 x half> [[TMP1]]
+; CHECK-NEXT:    ret <2 x half> [[C]]
+;
+  %b = fneg <2 x float> %a
+  %c = fptrunc fast <2 x float> %b to <2 x half>
+  ret <2 x half> %c
+}
+
+define <2 x half> @test4_unary_fneg-vec-mixed-fast-2(<2 x float> %a) {
+; CHECK-LABEL: @test4_unary_fneg-vec-mixed-fast-2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fptrunc <2 x float> [[A:%.*]] to <2 x half>
+; CHECK-NEXT:    [[C:%.*]] = fneg <2 x half> [[TMP1]]
+; CHECK-NEXT:    ret <2 x half> [[C]]
+;
+  %b = fneg fast <2 x float> %a
+  %c = fptrunc <2 x float> %b to <2 x half>
+  ret <2 x half> %c
+}
+
 define half @test5(float %a, float %b, float %c) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:    [[D:%.*]] = fcmp ogt float [[A:%.*]], [[B:%.*]]