[llvm] Update foldFMulReassoc to respect absent fast-math flags (PR #88589)

Tue Apr 16 14:11:22 PDT 2024

https://github.com/andykaylor updated https://github.com/llvm/llvm-project/pull/88589

>From 75997b0e42baa75c64637ed5fec8fbce1b0d822d Mon Sep 17 00:00:00 2001
From: Andy Kaylor <andrew.kaylor at intel.com>
Date: Fri, 12 Apr 2024 15:48:55 -0700
Subject: [PATCH 1/5] Update foldFMulReassoc to respect absent fast-math flags

This change updates a few of the transformations in foldFMulReassoc to
respect absent fast-math flags in a few cases where fmul and fdiv
instructions were being folded but the code was not checking for
fast-math flags on the fdiv instruction and was transferring flags to
the folded instruction that were not present on the original fdiv
instruction.

This fixes https://github.com/llvm/llvm-project/issues/82857
---
 .../InstCombine/InstCombineMulDivRem.cpp      | 33 +++++++++---
 llvm/test/Transforms/InstCombine/fmul.ll      | 53 ++++++++++++++++---
 2 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 4dc1319f1c437f..96485c40484007 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -636,26 +636,43 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
   // expression.
   if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {
     Constant *C1;
-    if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
+    if (match(Op0,
+              m_AllowReassoc(m_OneUse(m_FDiv(m_Constant(C1), m_Value(X)))))) {
       // (C1 / X) * C --> (C * C1) / X
       Constant *CC1 =
           ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);
-      if (CC1 && CC1->isNormalFP())
-        return BinaryOperator::CreateFDivFMF(CC1, X, &I);
+      if (CC1 && CC1->isNormalFP()) {
+        // Preserve only fast-math flags that were set on both of the original
+        // instructions
+        auto *NewDiv = BinaryOperator::CreateFDivFMF(CC1, X, &I);
+        NewDiv->andIRFlags(Op0);
+        return NewDiv;
+      }
     }
-    if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
+    if (match(Op0, m_AllowReassoc(m_FDiv(m_Value(X), m_Constant(C1))))) {
+      // FIXME: This seems like it should also be checking for arcp
       // (X / C1) * C --> X * (C / C1)
       Constant *CDivC1 =
           ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);
-      if (CDivC1 && CDivC1->isNormalFP())
-        return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
+      if (CDivC1 && CDivC1->isNormalFP()) {
+        // Preserve only fast-math flags that were set on both of the original
+        // instructions
+        auto *NewMul = BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
+        NewMul->andIRFlags(Op0);
+        return NewMul;
+      }
 
       // If the constant was a denormal, try reassociating differently.
       // (X / C1) * C --> X / (C1 / C)
       Constant *C1DivC =
           ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);
-      if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())
-        return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
+      if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP()) {
+        // Preserve only fast-math flags that were set on both of the original
+        // instructions
+        auto *NewDiv = BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
+        NewDiv->andIRFlags(Op0);
+        return NewDiv;
+      }
     }
 
     // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index f6435f0032891e..b541f455939c7e 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -652,12 +652,49 @@ define float @fdiv_constant_numerator_fmul(float %x) {
 ; CHECK-LABEL: @fdiv_constant_numerator_fmul(
 ; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]]
 ; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fdiv reassoc float 2.0e+3, %x
+  %t3 = fmul reassoc float %t1, 6.0e+3
+  ret float %t3
+}
+
+; C1/X * C2 => (C1*C2) / X with mixed fast-math flags
+
+define float @fdiv_constant_numerator_fmul_mixed(float %x) {
+; CHECK-LABEL: @fdiv_constant_numerator_fmul_mixed(
+; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fdiv reassoc float 2.0e+3, %x
+  %t3 = fmul fast float %t1, 6.0e+3
+  ret float %t3
+}
+
+; C1/X * C2 => (C1*C2) / X with full fast-math flags
+
+define float @fdiv_constant_numerator_fmul_fast(float %x) {
+; CHECK-LABEL: @fdiv_constant_numerator_fmul_fast(
+; CHECK-NEXT:    [[T3:%.*]] = fdiv fast float 1.200000e+07, [[X:%.*]]
+; CHECK-NEXT:    ret float [[T3]]
+;
+  %t1 = fdiv fast float 2.0e+3, %x
+  %t3 = fmul fast float %t1, 6.0e+3
+  ret float %t3
+}
+
+; C1/X * C2 => (C1*C2) / X with no fast-math flags on the fdiv
+
+define float @fdiv_constant_numerator_fmul_precdiv(float %x) {
+; CHECK-LABEL: @fdiv_constant_numerator_fmul_precdiv(
+; CHECK-NEXT:    [[T4:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]]
+; CHECK-NEXT:    ret float [[T4]]
 ;
   %t1 = fdiv float 2.0e+3, %x
   %t3 = fmul reassoc float %t1, 6.0e+3
   ret float %t3
 }
 
+
 ; C1/X * C2 => (C1*C2) / X is disabled if C1/X has multiple uses
 
 @fmul2_external = external global float
@@ -679,7 +716,8 @@ define float @fdiv_constant_numerator_fmul_extra_use(float %x) {
 
 define float @fdiv_constant_denominator_fmul(float %x) {
 ; CHECK-LABEL: @fdiv_constant_denominator_fmul(
-; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 6.000000e+03
+; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float [[TMP1]], 2.000000e+03
 ; CHECK-NEXT:    ret float [[T3]]
 ;
   %t1 = fdiv float %x, 2.0e+3
@@ -692,7 +730,7 @@ define <4 x float> @fdiv_constant_denominator_fmul_vec(<4 x float> %x) {
 ; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    ret <4 x float> [[T3]]
 ;
-  %t1 = fdiv <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
+  %t1 = fdiv reassoc <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
   %t3 = fmul reassoc <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3>
   ret <4 x float> %t3
 }
@@ -705,7 +743,7 @@ define <4 x float> @fdiv_constant_denominator_fmul_vec_constexpr(<4 x float> %x)
 ; CHECK-NEXT:    ret <4 x float> [[T3]]
 ;
   %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float>
-  %t1 = fdiv <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
+  %t1 = fdiv reassoc <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3>
   %t3 = fmul reassoc <4 x float> %t1, %constExprMul
   ret <4 x float> %t3
 }
@@ -745,7 +783,8 @@ define float @fdiv_constant_denominator_fmul_denorm(float %x) {
 
 define float @fdiv_constant_denominator_fmul_denorm_try_harder(float %x) {
 ; CHECK-LABEL: @fdiv_constant_denominator_fmul_denorm_try_harder(
-; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float [[X:%.*]], 0x47E8000000000000
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 0x3810000000000000
+; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float [[TMP1]], 3.000000e+00
 ; CHECK-NEXT:    ret float [[T3]]
 ;
   %t1 = fdiv float %x, 3.0
@@ -868,7 +907,8 @@ define float @fmul_fadd_distribute_extra_use(float %x) {
 
 define double @fmul_fadd_fdiv_distribute2(double %x) {
 ; CHECK-LABEL: @fmul_fadd_fdiv_distribute2(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc double [[X:%.*]], 0x10000000000000
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[TMP2]], 3.000000e+00
 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000
 ; CHECK-NEXT:    ret double [[T3]]
 ;
@@ -883,7 +923,8 @@ define double @fmul_fadd_fdiv_distribute2(double %x) {
 
 define double @fmul_fadd_fdiv_distribute3(double %x) {
 ; CHECK-LABEL: @fmul_fadd_fdiv_distribute3(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc double [[X:%.*]], 0x10000000000000
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[TMP2]], 3.000000e+00
 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000
 ; CHECK-NEXT:    ret double [[T3]]
 ;

>From 61fed39944a1f2d13932275d244e365d3da7e346 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <andrew.kaylor at intel.com>
Date: Mon, 15 Apr 2024 12:02:47 -0700
Subject: [PATCH 2/5] Added a missed case that was transforming non-constant
 fdiv

---
 .../InstCombine/InstCombineMulDivRem.cpp      | 10 ++--
 llvm/test/Transforms/InstCombine/fast-math.ll |  4 +-
 llvm/test/Transforms/InstCombine/fmul-pow.ll  | 30 +++++------
 llvm/test/Transforms/InstCombine/fmul.ll      | 54 +++++++++----------
 4 files changed, 49 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 96485c40484007..b7bcb839863954 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -698,10 +698,14 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
 
   Value *Z;
   if (match(&I,
-            m_c_FMul(m_OneUse(m_FDiv(m_Value(X), m_Value(Y))), m_Value(Z)))) {
+            m_c_FMul(m_AllowReassoc(m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))),
+                     m_Value(Z)))) {
     // Sink division: (X / Y) * Z --> (X * Z) / Y
-    Value *NewFMul = Builder.CreateFMulFMF(X, Z, &I);
-    return BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
+    auto *NewFMul = cast<Instruction>(Builder.CreateFMulFMF(X, Z, &I));
+    NewFMul->andIRFlags(Op0);
+    auto *NewDiv = BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
+    NewDiv->andIRFlags(Op0);
+    return NewDiv;
   }
 
   // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
index 129d7811cfb867..916955e34efacb 100644
--- a/llvm/test/Transforms/InstCombine/fast-math.ll
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -562,7 +562,7 @@ define float @fdiv1(float %x) {
 ; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FD7303B60000000
 ; CHECK-NEXT:    ret float [[DIV1]]
 ;
-  %div = fdiv float %x, 0x3FF3333340000000
+  %div = fdiv fast float %x, 0x3FF3333340000000
   %div1 = fdiv fast float %div, 0x4002666660000000
   ret float %div1
 ; 0x3FF3333340000000 = 1.2f
@@ -603,7 +603,7 @@ define float @fdiv3(float %x) {
 ; CHECK-NEXT:    [[DIV1:%.*]] = fdiv fast float [[TMP1]], 0x47EFFFFFE0000000
 ; CHECK-NEXT:    ret float [[DIV1]]
 ;
-  %div = fdiv float %x, 0x47EFFFFFE0000000
+  %div = fdiv fast float %x, 0x47EFFFFFE0000000
   %div1 = fdiv fast float %div, 0x4002666660000000
   ret float %div1
 }
diff --git a/llvm/test/Transforms/InstCombine/fmul-pow.ll b/llvm/test/Transforms/InstCombine/fmul-pow.ll
index 63458e136074c9..84592d220d62c4 100644
--- a/llvm/test/Transforms/InstCombine/fmul-pow.ll
+++ b/llvm/test/Transforms/InstCombine/fmul-pow.ll
@@ -85,8 +85,8 @@ define double @pow_ab_recip_a_reassoc(double %a, double %b)  {
 ; CHECK-NEXT:    [[M:%.*]] = call reassoc double @llvm.pow.f64(double [[A:%.*]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[M]]
 ;
-  %r = fdiv double 1.0, %a
-  %p = call double @llvm.pow.f64(double %a, double %b)
+  %r = fdiv reassoc double 1.0, %a
+  %p = call reassoc double @llvm.pow.f64(double %a, double %b)
   %m = fmul reassoc double %r, %p
   ret double %m
 }
@@ -99,8 +99,8 @@ define double @pow_ab_recip_a_reassoc_commute(double %a, double %b)  {
 ; CHECK-NEXT:    [[M:%.*]] = call reassoc double @llvm.pow.f64(double [[A:%.*]], double [[TMP1]])
 ; CHECK-NEXT:    ret double [[M]]
 ;
-  %r = fdiv double 1.0, %a
-  %p = call double @llvm.pow.f64(double %a, double %b)
+  %r = fdiv reassoc double 1.0, %a
+  %p = call reassoc double @llvm.pow.f64(double %a, double %b)
   %m = fmul reassoc double %p, %r
   ret double %m
 }
@@ -109,14 +109,14 @@ define double @pow_ab_recip_a_reassoc_commute(double %a, double %b)  {
 
 define double @pow_ab_recip_a_reassoc_use1(double %a, double %b)  {
 ; CHECK-LABEL: @pow_ab_recip_a_reassoc_use1(
-; CHECK-NEXT:    [[R:%.*]] = fdiv double 1.000000e+00, [[A:%.*]]
-; CHECK-NEXT:    [[P:%.*]] = call double @llvm.pow.f64(double [[A]], double [[B:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc double 1.000000e+00, [[A:%.*]]
+; CHECK-NEXT:    [[P:%.*]] = call reassoc double @llvm.pow.f64(double [[A]], double [[B:%.*]])
 ; CHECK-NEXT:    [[M:%.*]] = fmul reassoc double [[R]], [[P]]
 ; CHECK-NEXT:    call void @use(double [[R]])
 ; CHECK-NEXT:    ret double [[M]]
 ;
-  %r = fdiv double 1.0, %a
-  %p = call double @llvm.pow.f64(double %a, double %b)
+  %r = fdiv reassoc double 1.0, %a
+  %p = call reassoc double @llvm.pow.f64(double %a, double %b)
   %m = fmul reassoc double %r, %p
   call void @use(double %r)
   ret double %m
@@ -126,13 +126,13 @@ define double @pow_ab_recip_a_reassoc_use1(double %a, double %b)  {
 
 define double @pow_ab_recip_a_reassoc_use2(double %a, double %b)  {
 ; CHECK-LABEL: @pow_ab_recip_a_reassoc_use2(
-; CHECK-NEXT:    [[P:%.*]] = call double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]])
+; CHECK-NEXT:    [[P:%.*]] = call reassoc double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]])
 ; CHECK-NEXT:    [[M:%.*]] = fdiv reassoc double [[P]], [[A]]
 ; CHECK-NEXT:    call void @use(double [[P]])
 ; CHECK-NEXT:    ret double [[M]]
 ;
-  %r = fdiv double 1.0, %a
-  %p = call double @llvm.pow.f64(double %a, double %b)
+  %r = fdiv reassoc double 1.0, %a
+  %p = call reassoc double @llvm.pow.f64(double %a, double %b)
   %m = fmul reassoc double %r, %p
   call void @use(double %p)
   ret double %m
@@ -142,15 +142,15 @@ define double @pow_ab_recip_a_reassoc_use2(double %a, double %b)  {
 
 define double @pow_ab_recip_a_reassoc_use3(double %a, double %b)  {
 ; CHECK-LABEL: @pow_ab_recip_a_reassoc_use3(
-; CHECK-NEXT:    [[R:%.*]] = fdiv double 1.000000e+00, [[A:%.*]]
-; CHECK-NEXT:    [[P:%.*]] = call double @llvm.pow.f64(double [[A]], double [[B:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc double 1.000000e+00, [[A:%.*]]
+; CHECK-NEXT:    [[P:%.*]] = call reassoc double @llvm.pow.f64(double [[A]], double [[B:%.*]])
 ; CHECK-NEXT:    [[M:%.*]] = fmul reassoc double [[R]], [[P]]
 ; CHECK-NEXT:    call void @use(double [[R]])
 ; CHECK-NEXT:    call void @use(double [[P]])
 ; CHECK-NEXT:    ret double [[M]]
 ;
-  %r = fdiv double 1.0, %a
-  %p = call double @llvm.pow.f64(double %a, double %b)
+  %r = fdiv reassoc double 1.0, %a
+  %p = call reassoc double @llvm.pow.f64(double %a, double %b)
   %m = fmul reassoc double %r, %p
   call void @use(double %r)
   call void @use(double %p)
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index b541f455939c7e..5c230425e78a41 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -633,15 +633,15 @@ define float @log2half(float %x, float %y) {
 
 define float @log2half_commute(float %x1, float %y) {
 ; CHECK-LABEL: @log2half_commute(
+; CHECK-NEXT:    [[X1:%.*]] = fmul fast float [[X2:%.*]], 0x3FC24924A0000000
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast float @llvm.log2.f32(float [[Y:%.*]])
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[X1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[X1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub fast float [[TMP2]], [[X1]]
-; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[TMP3]], 0x3FC24924A0000000
-; CHECK-NEXT:    ret float [[MUL]]
+; CHECK-NEXT:    ret float [[TMP3]]
 ;
-  %x = fdiv float %x1, 7.0 ; thwart complexity-based canonicalization
-  %halfy = fmul float %y, 0.5
-  %log2 = call float @llvm.log2.f32(float %halfy)
+  %x = fdiv fast float %x1, 7.0 ; thwart complexity-based canonicalization
+  %halfy = fmul fast float %y, 0.5
+  %log2 = call fast float @llvm.log2.f32(float %halfy)
   %mul = fmul fast float %x, %log2
   ret float %mul
 }
@@ -689,7 +689,7 @@ define float @fdiv_constant_numerator_fmul_precdiv(float %x) {
 ; CHECK-NEXT:    [[T4:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]]
 ; CHECK-NEXT:    ret float [[T4]]
 ;
-  %t1 = fdiv float 2.0e+3, %x
+  %t1 = fdiv reassoc float 2.0e+3, %x
   %t3 = fmul reassoc float %t1, 6.0e+3
   ret float %t3
 }
@@ -716,11 +716,10 @@ define float @fdiv_constant_numerator_fmul_extra_use(float %x) {
 
 define float @fdiv_constant_denominator_fmul(float %x) {
 ; CHECK-LABEL: @fdiv_constant_denominator_fmul(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 6.000000e+03
-; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float [[TMP1]], 2.000000e+03
+; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00
 ; CHECK-NEXT:    ret float [[T3]]
 ;
-  %t1 = fdiv float %x, 2.0e+3
+  %t1 = fdiv reassoc float %x, 2.0e+3
   %t3 = fmul reassoc float %t1, 6.0e+3
   ret float %t3
 }
@@ -772,7 +771,7 @@ define float @fdiv_constant_denominator_fmul_denorm(float %x) {
 ; CHECK-NEXT:    [[T3:%.*]] = fmul fast float [[X:%.*]], 0x3760620000000000
 ; CHECK-NEXT:    ret float [[T3]]
 ;
-  %t1 = fdiv float %x, 2.0e+3
+  %t1 = fdiv fast float %x, 2.0e+3
   %t3 = fmul fast float %t1, 0x3810000000000000
   ret float %t3
 }
@@ -783,11 +782,10 @@ define float @fdiv_constant_denominator_fmul_denorm(float %x) {
 
 define float @fdiv_constant_denominator_fmul_denorm_try_harder(float %x) {
 ; CHECK-LABEL: @fdiv_constant_denominator_fmul_denorm_try_harder(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 0x3810000000000000
-; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float [[TMP1]], 3.000000e+00
+; CHECK-NEXT:    [[T3:%.*]] = fdiv reassoc float [[X:%.*]], 0x47E8000000000000
 ; CHECK-NEXT:    ret float [[T3]]
 ;
-  %t1 = fdiv float %x, 3.0
+  %t1 = fdiv reassoc float %x, 3.0
   %t3 = fmul reassoc float %t1, 0x3810000000000000
   ret float %t3
 }
@@ -907,13 +905,12 @@ define float @fmul_fadd_distribute_extra_use(float %x) {
 
 define double @fmul_fadd_fdiv_distribute2(double %x) {
 ; CHECK-LABEL: @fmul_fadd_fdiv_distribute2(
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc double [[X:%.*]], 0x10000000000000
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[TMP2]], 3.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000
 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000
 ; CHECK-NEXT:    ret double [[T3]]
 ;
-  %t1 = fdiv double %x, 3.0
-  %t2 = fadd double %t1, 5.0
+  %t1 = fdiv reassoc double %x, 3.0
+  %t2 = fadd reassoc double %t1, 5.0
   %t3 = fmul reassoc double %t2, 0x10000000000000
   ret double %t3
 }
@@ -923,13 +920,12 @@ define double @fmul_fadd_fdiv_distribute2(double %x) {
 
 define double @fmul_fadd_fdiv_distribute3(double %x) {
 ; CHECK-LABEL: @fmul_fadd_fdiv_distribute3(
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc double [[X:%.*]], 0x10000000000000
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[TMP2]], 3.000000e+00
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc double [[X:%.*]], 0x7FE8000000000000
 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000
 ; CHECK-NEXT:    ret double [[T3]]
 ;
-  %t1 = fdiv double %x, 3.0
-  %t2 = fadd double %t1, 5.0
+  %t1 = fdiv reassoc double %x, 3.0
+  %t2 = fadd reassoc double %t1, 5.0
   %t3 = fmul reassoc double %t2, 0x10000000000000
   ret double %t3
 }
@@ -1027,8 +1023,8 @@ define double @fmul_fdivs_factor_common_denominator(double %x, double %y, double
 ; CHECK-NEXT:    [[MUL:%.*]] = fdiv fast double [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
-  %div1 = fdiv double %x, %z
-  %div2 = fdiv double %y, %z
+  %div1 = fdiv fast double %x, %z
+  %div2 = fdiv fast double %y, %z
   %mul = fmul fast double %div1, %div2
   ret double %mul
 }
@@ -1040,8 +1036,8 @@ define double @fmul_fdivs_factor(double %x, double %y, double %z, double %w) {
 ; CHECK-NEXT:    [[MUL:%.*]] = fdiv reassoc double [[TMP2]], [[Y:%.*]]
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
-  %div1 = fdiv double %x, %y
-  %div2 = fdiv double %z, %w
+  %div1 = fdiv reassoc double %x, %y
+  %div2 = fdiv reassoc double %z, %w
   %mul = fmul reassoc double %div1, %div2
   ret double %mul
 }
@@ -1052,7 +1048,7 @@ define double @fmul_fdiv_factor(double %x, double %y, double %z) {
 ; CHECK-NEXT:    [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[Y:%.*]]
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
-  %div = fdiv double %x, %y
+  %div = fdiv reassoc double %x, %y
   %mul = fmul reassoc double %div, %z
   ret double %mul
 }
@@ -1063,7 +1059,7 @@ define double @fmul_fdiv_factor_constant1(double %x, double %y) {
 ; CHECK-NEXT:    [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[Y:%.*]]
 ; CHECK-NEXT:    ret double [[MUL]]
 ;
-  %div = fdiv double %x, %y
+  %div = fdiv reassoc double %x, %y
   %mul = fmul reassoc double %div, 42.0
   ret double %mul
 }
@@ -1074,7 +1070,7 @@ define <2 x float> @fmul_fdiv_factor_constant2(<2 x float> %x, <2 x float> %y) {
 ; CHECK-NEXT:    [[MUL:%.*]] = fdiv reassoc <2 x float> [[TMP1]], <float 4.200000e+01, float 1.200000e+01>
 ; CHECK-NEXT:    ret <2 x float> [[MUL]]
 ;
-  %div = fdiv <2 x float> %x, <float 42.0, float 12.0>
+  %div = fdiv reassoc <2 x float> %x, <float 42.0, float 12.0>
   %mul = fmul reassoc <2 x float> %div, %y
   ret <2 x float> %mul
 }

>From ab7175639b2d307b9cf14d3da727ecab24c0682b Mon Sep 17 00:00:00 2001
From: Andy Kaylor <andrew.kaylor at intel.com>
Date: Mon, 15 Apr 2024 12:08:25 -0700
Subject: [PATCH 3/5] Correct precdiv test

---
 llvm/test/Transforms/InstCombine/fmul.ll | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 5c230425e78a41..71656dd596db90 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -686,10 +686,11 @@ define float @fdiv_constant_numerator_fmul_fast(float %x) {
 
 define float @fdiv_constant_numerator_fmul_precdiv(float %x) {
 ; CHECK-LABEL: @fdiv_constant_numerator_fmul_precdiv(
-; CHECK-NEXT:    [[T4:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = fdiv float 2.000000e+03, [[X:%.*]]
+; CHECK-NEXT:    [[T4:%.*]] = fmul reassoc float [[T1]], 6.000000e+03
 ; CHECK-NEXT:    ret float [[T4]]
 ;
-  %t1 = fdiv reassoc float 2.0e+3, %x
+  %t1 = fdiv float 2.0e+3, %x
   %t3 = fmul reassoc float %t1, 6.0e+3
   ret float %t3
 }

>From c4467d281aad1935becc2a6fa534c3ee7b33d7b8 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <andrew.kaylor at intel.com>
Date: Tue, 16 Apr 2024 12:26:43 -0700
Subject: [PATCH 4/5] Refactor FMF handling

---
 llvm/include/llvm/IR/InstrTypes.h             | 26 ++++++++
 .../InstCombine/InstCombineMulDivRem.cpp      | 63 +++++++++----------
 llvm/test/Transforms/InstCombine/fmul.ll      | 22 +++----
 3 files changed, 65 insertions(+), 46 deletions(-)

diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index cfe1b11ade5a4e..4f7bf8980e1ab3 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -24,6 +24,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/FMF.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
@@ -311,6 +312,31 @@ class BinaryOperator : public Instruction {
     return BO;
   }
 
+  static BinaryOperator *
+  CreateWithFMF(BinaryOps Opc, Value *V1, Value *V2, FastMathFlags FMF,
+                const Twine &Name = "", Instruction *InsertBefore = nullptr) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, InsertBefore);
+    BO->setFastMathFlags(FMF);
+    return BO;
+  }
+
+  static BinaryOperator *CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF,
+                                       const Twine &Name = "") {
+    return CreateWithFMF(Instruction::FAdd, V1, V2, FMF, Name);
+  }
+  static BinaryOperator *CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF,
+                                       const Twine &Name = "") {
+    return CreateWithFMF(Instruction::FSub, V1, V2, FMF, Name);
+  }
+  static BinaryOperator *CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF,
+                                       const Twine &Name = "") {
+    return CreateWithFMF(Instruction::FMul, V1, V2, FMF, Name);
+  }
+  static BinaryOperator *CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF,
+                                       const Twine &Name = "") {
+    return CreateWithFMF(Instruction::FDiv, V1, V2, FMF, Name);
+  }
+
   static BinaryOperator *CreateFAddFMF(Value *V1, Value *V2,
                                        Instruction *FMFSource,
                                        const Twine &Name = "") {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index b7bcb839863954..504951f6fc1d85 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -631,48 +631,38 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
   Value *Op1 = I.getOperand(1);
   Value *X, *Y;
   Constant *C;
+  BinaryOperator *Op0BinOp;
 
   // Reassociate constant RHS with another constant to form constant
   // expression.
-  if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP()) {
+  if (match(Op1, m_Constant(C)) && C->isFiniteNonZeroFP() &&
+      match(Op0, m_AllowReassoc(m_BinOp(Op0BinOp)))) {
+    // Everything in this scope folds I with Op0, intersecting their FMF.
+    FastMathFlags FMF = I.getFastMathFlags() & Op0BinOp->getFastMathFlags();
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+    Builder.setFastMathFlags(FMF);
     Constant *C1;
-    if (match(Op0,
-              m_AllowReassoc(m_OneUse(m_FDiv(m_Constant(C1), m_Value(X)))))) {
+    if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
       // (C1 / X) * C --> (C * C1) / X
       Constant *CC1 =
           ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);
-      if (CC1 && CC1->isNormalFP()) {
-        // Preserve only fast-math flags that were set on both of the original
-        // instructions
-        auto *NewDiv = BinaryOperator::CreateFDivFMF(CC1, X, &I);
-        NewDiv->andIRFlags(Op0);
-        return NewDiv;
-      }
+      if (CC1 && CC1->isNormalFP())
+        return BinaryOperator::CreateFDivFMF(CC1, X, FMF);
     }
-    if (match(Op0, m_AllowReassoc(m_FDiv(m_Value(X), m_Constant(C1))))) {
+    if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
       // FIXME: This seems like it should also be checking for arcp
       // (X / C1) * C --> X * (C / C1)
       Constant *CDivC1 =
           ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);
-      if (CDivC1 && CDivC1->isNormalFP()) {
-        // Preserve only fast-math flags that were set on both of the original
-        // instructions
-        auto *NewMul = BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
-        NewMul->andIRFlags(Op0);
-        return NewMul;
-      }
+      if (CDivC1 && CDivC1->isNormalFP())
+        return BinaryOperator::CreateFMulFMF(X, CDivC1, FMF);
 
       // If the constant was a denormal, try reassociating differently.
       // (X / C1) * C --> X / (C1 / C)
       Constant *C1DivC =
           ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);
-      if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP()) {
-        // Preserve only fast-math flags that were set on both of the original
-        // instructions
-        auto *NewDiv = BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
-        NewDiv->andIRFlags(Op0);
-        return NewDiv;
-      }
+      if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())
+        return BinaryOperator::CreateFDivFMF(X, C1DivC, FMF);
     }
 
     // We do not need to match 'fadd C, X' and 'fsub X, C' because they are
@@ -682,16 +672,16 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
       // (X + C1) * C --> (X * C) + (C * C1)
       if (Constant *CC1 =
               ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {
-        Value *XC = Builder.CreateFMulFMF(X, C, &I);
-        return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
+        Value *XC = Builder.CreateFMul(X, C);
+        return BinaryOperator::CreateFAddFMF(XC, CC1, FMF);
       }
     }
     if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
       // (C1 - X) * C --> (C * C1) - (X * C)
       if (Constant *CC1 =
               ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL)) {
-        Value *XC = Builder.CreateFMulFMF(X, C, &I);
-        return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+        Value *XC = Builder.CreateFMul(X, C);
+        return BinaryOperator::CreateFSubFMF(CC1, XC, FMF);
       }
     }
   }
@@ -700,12 +690,15 @@ Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) {
   if (match(&I,
             m_c_FMul(m_AllowReassoc(m_OneUse(m_FDiv(m_Value(X), m_Value(Y)))),
                      m_Value(Z)))) {
-    // Sink division: (X / Y) * Z --> (X * Z) / Y
-    auto *NewFMul = cast<Instruction>(Builder.CreateFMulFMF(X, Z, &I));
-    NewFMul->andIRFlags(Op0);
-    auto *NewDiv = BinaryOperator::CreateFDivFMF(NewFMul, Y, &I);
-    NewDiv->andIRFlags(Op0);
-    return NewDiv;
+    BinaryOperator *DivOp = cast<BinaryOperator>(((Z == Op0) ? Op1 : Op0));
+    FastMathFlags FMF = I.getFastMathFlags() & DivOp->getFastMathFlags();
+    if (FMF.allowReassoc()) {
+      // Sink division: (X / Y) * Z --> (X * Z) / Y
+      IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+      Builder.setFastMathFlags(FMF);
+      auto *NewFMul = Builder.CreateFMul(X, Z);
+      return BinaryOperator::CreateFDivFMF(NewFMul, Y, FMF);
+    }
   }
 
   // sqrt(X) * sqrt(Y) -> sqrt(X * Y)
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 71656dd596db90..39f9e74f899d18 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -814,7 +814,7 @@ define float @fmul_fadd_distribute(float %x) {
 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[TMP1]], 6.000000e+00
 ; CHECK-NEXT:    ret float [[T3]]
 ;
-  %t2 = fadd float %x, 2.0
+  %t2 = fadd reassoc float %x, 2.0
   %t3 = fmul reassoc float %t2, 3.0
   ret float %t3
 }
@@ -825,7 +825,7 @@ define <2 x float> @fmul_fadd_distribute_vec(<2 x float> %x) {
 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc <2 x float> [[TMP1]], <float 1.200000e+07, float 1.200000e+07>
 ; CHECK-NEXT:    ret <2 x float> [[T3]]
 ;
-  %t1 = fadd <2 x float> <float 2.0e+3, float 2.0e+3>, %x
+  %t1 = fadd reassoc <2 x float> <float 2.0e+3, float 2.0e+3>, %x
   %t3 = fmul reassoc <2 x float> %t1, <float 6.0e+3, float 6.0e+3>
   ret <2 x float> %t3
 }
@@ -836,7 +836,7 @@ define <vscale x 2 x float> @fmul_fadd_distribute_scalablevec(<vscale x 2 x floa
 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc <vscale x 2 x float> [[TMP1]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> poison, float 1.200000e+07, i64 0), <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x float> [[T3]]
 ;
-  %t1 = fadd <vscale x 2 x float> splat (float 2.0e+3), %x
+  %t1 = fadd reassoc <vscale x 2 x float> splat (float 2.0e+3), %x
   %t3 = fmul reassoc <vscale x 2 x float> %t1, splat (float 6.0e+3)
 
 
@@ -851,7 +851,7 @@ define float @fmul_fsub_distribute1(float %x) {
 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc float [[TMP1]], -6.000000e+00
 ; CHECK-NEXT:    ret float [[T3]]
 ;
-  %t2 = fsub float %x, 2.0
+  %t2 = fsub reassoc float %x, 2.0
   %t3 = fmul reassoc float %t2, 3.0
   ret float %t3
 }
@@ -864,7 +864,7 @@ define float @fmul_fsub_distribute2(float %x) {
 ; CHECK-NEXT:    [[T3:%.*]] = fsub reassoc float 6.000000e+00, [[TMP1]]
 ; CHECK-NEXT:    ret float [[T3]]
 ;
-  %t2 = fsub float 2.0, %x
+  %t2 = fsub reassoc float 2.0, %x
   %t3 = fmul reassoc float %t2, 3.0
   ret float %t3
 }
@@ -878,8 +878,8 @@ define float @fmul_fadd_fmul_distribute(float %x) {
 ; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[TMP1]], 1.000000e+01
 ; CHECK-NEXT:    ret float [[T3]]
 ;
-  %t1 = fmul float %x, 6.0
-  %t2 = fadd float %t1, 2.0
+  %t1 = fmul fast float %x, 6.0
+  %t2 = fadd fast float %t1, 2.0
   %t3 = fmul fast float %t2, 5.0
   ret float %t3
 }
@@ -940,8 +940,8 @@ define float @fmul_fsub_fmul_distribute(float %x) {
 ; CHECK-NEXT:    [[T3:%.*]] = fsub fast float 1.000000e+01, [[TMP1]]
 ; CHECK-NEXT:    ret float [[T3]]
 ;
-  %t1 = fmul float %x, 6.0
-  %t2 = fsub float 2.0, %t1
+  %t1 = fmul fast float %x, 6.0
+  %t2 = fsub fast float 2.0, %t1
   %t3 = fmul fast float %t2, 5.0
   ret float %t3
 }
@@ -970,8 +970,8 @@ define float @fmul_fsub_fmul_distribute2(float %x) {
 ; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[TMP1]], -1.000000e+01
 ; CHECK-NEXT:    ret float [[T3]]
 ;
-  %t1 = fmul float %x, 6.0
-  %t2 = fsub float %t1, 2.0
+  %t1 = fmul fast float %x, 6.0
+  %t2 = fsub fast float %t1, 2.0
   %t3 = fmul fast float %t2, 5.0
   ret float %t3
 }

>From 7402cfde0650c768e3632801cf00e73044fd5fc1 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <andrew.kaylor at intel.com>
Date: Tue, 16 Apr 2024 14:10:57 -0700
Subject: [PATCH 5/5] Fix formatting

---
 llvm/include/llvm/IR/InstrTypes.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 4f7bf8980e1ab3..8e6bef69218c2b 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -312,9 +312,10 @@ class BinaryOperator : public Instruction {
     return BO;
   }
 
-  static BinaryOperator *
-  CreateWithFMF(BinaryOps Opc, Value *V1, Value *V2, FastMathFlags FMF,
-                const Twine &Name = "", Instruction *InsertBefore = nullptr) {
+  static BinaryOperator *CreateWithFMF(BinaryOps Opc, Value *V1, Value *V2,
+                                       FastMathFlags FMF,
+                                       const Twine &Name = "",
+                                       Instruction *InsertBefore = nullptr) {
     BinaryOperator *BO = Create(Opc, V1, V2, Name, InsertBefore);
     BO->setFastMathFlags(FMF);
     return BO;