[llvm] [Reland][InstCombine] Fix FMF propagation in `foldSelectIntoOp` (PR #114499)

Thu Oct 31 19:52:27 PDT 2024

https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/114499

>From 163698f0cf23446c19b85fc34e09dabe59437c15 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Thu, 31 Oct 2024 23:26:45 +0800
Subject: [PATCH 1/4] [InstCombine] Fix FMF propagation in `foldSelectIntoOp`
 (#114356)

Closes https://github.com/llvm/llvm-project/issues/113423.
---
 .../InstCombine/InstCombineSelect.cpp          |  5 ++---
 .../InstCombine/fold-select-fmul-if-zero.ll    | 18 +++++++++---------
 .../select-binop-foldable-floating-point.ll    | 16 ++++++++--------
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index c5f39a4c381ed1..9e193e7faa8ac3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -529,9 +529,6 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
     if (!OpToFold)
       return nullptr;
 
-    // TODO: We probably ought to revisit cases where the select and FP
-    // instructions have different flags and add tests to ensure the
-    // behaviour is correct.
     FastMathFlags FMF;
     if (isa<FPMathOperator>(&SI))
       FMF = SI.getFastMathFlags();
@@ -564,6 +561,8 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
     BinaryOperator *BO =
         BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel);
     BO->copyIRFlags(TVI);
+    if (isa<FPMathOperator>(&SI))
+      BO->andIRFlags(NewSel);
     return BO;
   };
 
diff --git a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
index 1c28b151825c12..caf38c676e20d7 100644
--- a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
+++ b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
@@ -468,7 +468,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_fmul(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nsz_fmul(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nsz float [[X]], [[SCALED_X]]
+; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul float [[X]], [[SCALED_X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
@@ -482,7 +482,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_ninf_fmul(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nsz_ninf_fmul(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul ninf nsz float [[X]], [[SCALED_X]]
+; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul float [[X]], [[SCALED_X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
@@ -496,7 +496,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_nnan_fmul(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nsz_nnan_fmul(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan nsz float [[X]], [[SCALED_X]]
+; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
@@ -510,7 +510,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nnan_ninf_fmul(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nnan_ninf_fmul(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf float [[X]], [[SCALED_X]]
+; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
@@ -559,7 +559,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(float %x, float %
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf nsz float [[X]], [[SCALED_X]]
+; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
@@ -572,7 +572,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted(float %x
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf nsz float [[X]], [[SCALED_X]]
+; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
@@ -586,7 +586,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_ne
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf float [[X]], [[SCALED_X]]
+; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
@@ -599,7 +599,7 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_ne
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero_negsub(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf float [[X]], [[SCALED_X]]
+; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
@@ -706,7 +706,7 @@ define float @fmul_by_self_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(float %x) {
 ; CHECK-LABEL: @fmul_by_self_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
 ; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00
-; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf nsz float [[X]], [[SCALED_X]]
+; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan float [[X]], [[SCALED_X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
diff --git a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
index e5ad312bb85c17..df02eb7bb83ac6 100644
--- a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
+++ b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
@@ -48,7 +48,7 @@ define float @select_nnan_fadd_swapped(i1 %cond, float %A, float %B) {
 define float @select_nnan_fadd_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fadd_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
-; CHECK-NEXT:    [[D:%.*]] = fadd fast float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fadd nnan float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd fast float %A, %B
@@ -59,7 +59,7 @@ define float @select_nnan_fadd_fast_math(i1 %cond, float %A, float %B) {
 define float @select_nnan_fadd_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fadd_swapped_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fadd fast float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fadd nnan float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd fast float %A, %B
@@ -125,7 +125,7 @@ define float @select_nnan_fmul_swapped(i1 %cond, float %A, float %B) {
 define float @select_nnan_fmul_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fmul_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[D:%.*]] = fmul fast float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fmul nnan float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul fast float %A, %B
@@ -136,7 +136,7 @@ define float @select_nnan_fmul_fast_math(i1 %cond, float %A, float %B) {
 define float @select_nnan_fmul_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fmul_swapped_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fmul fast float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fmul nnan float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul fast float %A, %B
@@ -169,7 +169,7 @@ define float @select_nnan_fsub_swapped(i1 %cond, float %A, float %B) {
 define float @select_nnan_fsub_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fsub_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00
-; CHECK-NEXT:    [[D:%.*]] = fsub fast float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fsub nnan float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub fast float %A, %B
@@ -180,7 +180,7 @@ define float @select_nnan_fsub_fast_math(i1 %cond, float %A, float %B) {
 define float @select_nnan_fsub_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fsub_swapped_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fsub fast float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fsub nnan float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub fast float %A, %B
@@ -247,7 +247,7 @@ define float @select_nnan_fdiv_swapped(i1 %cond, float %A, float %B) {
 define float @select_nnan_fdiv_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fdiv_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[D:%.*]] = fdiv fast float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fdiv nnan float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv fast float %A, %B
@@ -258,7 +258,7 @@ define float @select_nnan_fdiv_fast_math(i1 %cond, float %A, float %B) {
 define float @select_nnan_fdiv_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fdiv_swapped_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fdiv fast float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fdiv nnan float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv fast float %A, %B

>From 20db2503d93db875d982fe44a8e6b1c035bfaa7f Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 1 Nov 2024 09:47:40 +0800
Subject: [PATCH 2/4] [Transform] Update tests. NFC.

---
 .../LoopVectorize/ARM/mve-selectandorcost.ll  |  2 +-
 .../AArch64/predicated-reduction.ll           | 38 ++++++++-----------
 2 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
index b54d4a97d07be7..e427cfd97c598b 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
@@ -46,7 +46,7 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 %
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = fdiv fast <4 x float> [[TMP8]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[DOTNOT9]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[TMP9]]
-; CHECK-NEXT:    [[PREDPHI]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP10]]
+; CHECK-NEXT:    [[PREDPHI]] = fadd <4 x float> [[VEC_PHI]], [[TMP10]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
index 9e9d1e3bab2ce4..04e9afc67fa38b 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
@@ -13,7 +13,7 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
 ; CHECK:       [[FOR_BODY_PREHEADER]]:
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[RAND_BLOCK_LENGTH]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER23:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER20:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Y]], i64 0
@@ -23,10 +23,8 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI15:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI16:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI17:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI15:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX]], i64 8
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 4
@@ -43,31 +41,27 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast <2 x double> [[TMP7]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP6]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
 ; CHECK-NEXT:    [[TMP13:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
-; CHECK-NEXT:    [[TMP14]] = fadd fast <2 x double> [[TMP12]], [[VEC_PHI16]]
-; CHECK-NEXT:    [[TMP15]] = fadd fast <2 x double> [[TMP13]], [[VEC_PHI17]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP8]], <2 x double> [[TMP10]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
 ; CHECK-NEXT:    [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x double> [[TMP11]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
-; CHECK-NEXT:    [[TMP18]] = fadd fast <2 x double> [[TMP16]], [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP19]] = fadd fast <2 x double> [[TMP17]], [[VEC_PHI15]]
+; CHECK-NEXT:    [[TMP22:%.*]] = tail call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP16]])
+; CHECK-NEXT:    [[TMP21]] = tail call double @llvm.vector.reduce.fadd.v2f64(double [[TMP22]], <2 x double> [[TMP17]])
+; CHECK-NEXT:    [[TMP18:%.*]] = tail call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI15]], <2 x double> [[TMP12]])
+; CHECK-NEXT:    [[TMP19]] = tail call double @llvm.vector.reduce.fadd.v2f64(double [[TMP18]], <2 x double> [[TMP13]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV]], 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <2 x double> [[TMP19]], [[TMP18]]
-; CHECK-NEXT:    [[TMP21:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX]])
-; CHECK-NEXT:    [[BIN_RDX21:%.*]] = fadd fast <2 x double> [[TMP15]], [[TMP14]]
-; CHECK-NEXT:    [[TMP22:%.*]] = tail call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX21]])
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER23]]
-; CHECK:       [[FOR_BODY_PREHEADER23]]:
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER20]]
+; CHECK:       [[FOR_BODY_PREHEADER20]]:
 ; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[V1_012_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP21]], %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[V0_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[V0_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP19]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER23]] ]
-; CHECK-NEXT:    [[V1_012:%.*]] = phi double [ [[V1_2:%.*]], %[[FOR_BODY]] ], [ [[V1_012_PH]], %[[FOR_BODY_PREHEADER23]] ]
-; CHECK-NEXT:    [[V0_011:%.*]] = phi double [ [[V0_2:%.*]], %[[FOR_BODY]] ], [ [[V0_011_PH]], %[[FOR_BODY_PREHEADER23]] ]
+; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER20]] ]
+; CHECK-NEXT:    [[V1_012:%.*]] = phi double [ [[V1_2:%.*]], %[[FOR_BODY]] ], [ [[V1_012_PH]], %[[FOR_BODY_PREHEADER20]] ]
+; CHECK-NEXT:    [[V0_011:%.*]] = phi double [ [[V0_2:%.*]], %[[FOR_BODY]] ], [ [[V0_011_PH]], %[[FOR_BODY_PREHEADER20]] ]
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[TMP0]] to double
@@ -76,14 +70,14 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00
 ; CHECK-NEXT:    [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]]
 ; CHECK-NEXT:    [[ADD8:%.*]] = tail call fast double @llvm.maxnum.f64(double [[SUB]], double -0.000000e+00)
-; CHECK-NEXT:    [[V0_2]] = fadd fast double [[ADD8]], [[V0_011]]
+; CHECK-NEXT:    [[V0_2]] = fadd double [[V0_011]], [[ADD8]]
 ; CHECK-NEXT:    [[ADD4:%.*]] = select i1 [[CMP1]], double [[MUL3]], double -0.000000e+00
-; CHECK-NEXT:    [[V1_2]] = fadd fast double [[ADD4]], [[V1_012]]
+; CHECK-NEXT:    [[V1_2]] = fadd double [[V1_012]], [[ADD4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[FOR_END_LOOPEXIT]]:
-; CHECK-NEXT:    [[V0_1:%.*]] = phi double [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ [[V0_2]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[V0_1:%.*]] = phi double [ [[TMP19]], %[[MIDDLE_BLOCK]] ], [ [[V0_2]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[V1_1:%.*]] = phi double [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[V1_2]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]]
 ; CHECK-NEXT:    br label %[[FOR_END]]

>From e898145c17ab2aecf37fd824cc31a1b2f0e690a6 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 1 Nov 2024 10:33:32 +0800
Subject: [PATCH 3/4] [InstCombine] Keep non poison generating FMF flags

---
 .../InstCombine/InstCombineSelect.cpp         |  8 +++-
 .../select-binop-foldable-floating-point.ll   | 16 ++++----
 .../LoopVectorize/ARM/mve-selectandorcost.ll  |  2 +-
 .../AArch64/predicated-reduction.ll           | 38 +++++++++++--------
 4 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 9e193e7faa8ac3..68ac11a5fb51a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -561,8 +561,12 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
     BinaryOperator *BO =
         BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel);
     BO->copyIRFlags(TVI);
-    if (isa<FPMathOperator>(&SI))
-      BO->andIRFlags(NewSel);
+    if (isa<FPMathOperator>(&SI)) {
+      // Merge poison generating flags from the select
+      BO->setHasNoNaNs(BO->hasNoNaNs() && FMF.noNaNs());
+      BO->setHasNoInfs(BO->hasNoInfs() && FMF.noInfs());
+      BO->setHasNoSignedZeros(BO->hasNoSignedZeros() && FMF.noSignedZeros());
+    }
     return BO;
   };
 
diff --git a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
index df02eb7bb83ac6..253bc9e784c2f0 100644
--- a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
+++ b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
@@ -48,7 +48,7 @@ define float @select_nnan_fadd_swapped(i1 %cond, float %A, float %B) {
 define float @select_nnan_fadd_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fadd_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
-; CHECK-NEXT:    [[D:%.*]] = fadd nnan float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fadd reassoc nnan arcp contract afn float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd fast float %A, %B
@@ -59,7 +59,7 @@ define float @select_nnan_fadd_fast_math(i1 %cond, float %A, float %B) {
 define float @select_nnan_fadd_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fadd_swapped_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fadd nnan float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fadd reassoc nnan arcp contract afn float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd fast float %A, %B
@@ -125,7 +125,7 @@ define float @select_nnan_fmul_swapped(i1 %cond, float %A, float %B) {
 define float @select_nnan_fmul_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fmul_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[D:%.*]] = fmul nnan float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fmul reassoc nnan arcp contract afn float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul fast float %A, %B
@@ -136,7 +136,7 @@ define float @select_nnan_fmul_fast_math(i1 %cond, float %A, float %B) {
 define float @select_nnan_fmul_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fmul_swapped_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fmul nnan float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fmul reassoc nnan arcp contract afn float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul fast float %A, %B
@@ -169,7 +169,7 @@ define float @select_nnan_fsub_swapped(i1 %cond, float %A, float %B) {
 define float @select_nnan_fsub_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fsub_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00
-; CHECK-NEXT:    [[D:%.*]] = fsub nnan float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fsub reassoc nnan arcp contract afn float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub fast float %A, %B
@@ -180,7 +180,7 @@ define float @select_nnan_fsub_fast_math(i1 %cond, float %A, float %B) {
 define float @select_nnan_fsub_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fsub_swapped_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fsub nnan float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fsub reassoc nnan arcp contract afn float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub fast float %A, %B
@@ -247,7 +247,7 @@ define float @select_nnan_fdiv_swapped(i1 %cond, float %A, float %B) {
 define float @select_nnan_fdiv_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fdiv_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
-; CHECK-NEXT:    [[D:%.*]] = fdiv nnan float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fdiv reassoc nnan arcp contract afn float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv fast float %A, %B
@@ -258,7 +258,7 @@ define float @select_nnan_fdiv_fast_math(i1 %cond, float %A, float %B) {
 define float @select_nnan_fdiv_swapped_fast_math(i1 %cond, float %A, float %B) {
 ; CHECK-LABEL: @select_nnan_fdiv_swapped_fast_math(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fdiv nnan float [[A:%.*]], [[C]]
+; CHECK-NEXT:    [[D:%.*]] = fdiv reassoc nnan arcp contract afn float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv fast float %A, %B
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
index e427cfd97c598b..b0cccf4d0a7bfa 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
@@ -46,7 +46,7 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 %
 ; CHECK-NEXT:    [[TMP8:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP7]])
 ; CHECK-NEXT:    [[TMP9:%.*]] = fdiv fast <4 x float> [[TMP8]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[DOTNOT9]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[TMP9]]
-; CHECK-NEXT:    [[PREDPHI]] = fadd <4 x float> [[VEC_PHI]], [[TMP10]]
+; CHECK-NEXT:    [[PREDPHI]] = fadd reassoc arcp contract afn <4 x float> [[VEC_PHI]], [[TMP10]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
index 04e9afc67fa38b..a10e3267a0f56c 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
@@ -13,7 +13,7 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
 ; CHECK:       [[FOR_BODY_PREHEADER]]:
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[RAND_BLOCK_LENGTH]], 4
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER20:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER23:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Y]], i64 0
@@ -23,8 +23,10 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI15:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x double> [ <double 0.000000e+00, double -0.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI15:%.*]] = phi <2 x double> [ <double -0.000000e+00, double -0.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI16:%.*]] = phi <2 x double> [ <double 0.000000e+00, double -0.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI17:%.*]] = phi <2 x double> [ <double -0.000000e+00, double -0.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX]], i64 8
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 4
@@ -41,27 +43,31 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
 ; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast <2 x double> [[TMP7]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP6]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
 ; CHECK-NEXT:    [[TMP13:%.*]] = tail call fast <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP7]], <2 x double> <double -0.000000e+00, double -0.000000e+00>)
+; CHECK-NEXT:    [[TMP14]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI16]], [[TMP12]]
+; CHECK-NEXT:    [[TMP15]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI17]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP8]], <2 x double> [[TMP10]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
 ; CHECK-NEXT:    [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x double> [[TMP11]], <2 x double> <double -0.000000e+00, double -0.000000e+00>
-; CHECK-NEXT:    [[TMP22:%.*]] = tail call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> [[TMP16]])
-; CHECK-NEXT:    [[TMP21]] = tail call double @llvm.vector.reduce.fadd.v2f64(double [[TMP22]], <2 x double> [[TMP17]])
-; CHECK-NEXT:    [[TMP18:%.*]] = tail call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI15]], <2 x double> [[TMP12]])
-; CHECK-NEXT:    [[TMP19]] = tail call double @llvm.vector.reduce.fadd.v2f64(double [[TMP18]], <2 x double> [[TMP13]])
+; CHECK-NEXT:    [[TMP18]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI]], [[TMP16]]
+; CHECK-NEXT:    [[TMP19]] = fadd reassoc arcp contract afn <2 x double> [[VEC_PHI15]], [[TMP17]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV]], 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd reassoc arcp contract afn <2 x double> [[TMP19]], [[TMP18]]
+; CHECK-NEXT:    [[TMP21:%.*]] = tail call reassoc arcp contract afn double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[BIN_RDX]])
+; CHECK-NEXT:    [[BIN_RDX21:%.*]] = fadd reassoc arcp contract afn <2 x double> [[TMP15]], [[TMP14]]
+; CHECK-NEXT:    [[TMP22:%.*]] = tail call reassoc arcp contract afn double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[BIN_RDX21]])
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER20]]
-; CHECK:       [[FOR_BODY_PREHEADER20]]:
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY_PREHEADER23]]
+; CHECK:       [[FOR_BODY_PREHEADER23]]:
 ; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[V1_012_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP21]], %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[V0_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP19]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[V0_011_PH:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER20]] ]
-; CHECK-NEXT:    [[V1_012:%.*]] = phi double [ [[V1_2:%.*]], %[[FOR_BODY]] ], [ [[V1_012_PH]], %[[FOR_BODY_PREHEADER20]] ]
-; CHECK-NEXT:    [[V0_011:%.*]] = phi double [ [[V0_2:%.*]], %[[FOR_BODY]] ], [ [[V0_011_PH]], %[[FOR_BODY_PREHEADER20]] ]
+; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER23]] ]
+; CHECK-NEXT:    [[V1_012:%.*]] = phi double [ [[V1_2:%.*]], %[[FOR_BODY]] ], [ [[V1_012_PH]], %[[FOR_BODY_PREHEADER23]] ]
+; CHECK-NEXT:    [[V0_011:%.*]] = phi double [ [[V0_2:%.*]], %[[FOR_BODY]] ], [ [[V0_011_PH]], %[[FOR_BODY_PREHEADER23]] ]
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV1]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[TMP0]] to double
@@ -70,14 +76,14 @@ define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef
 ; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00
 ; CHECK-NEXT:    [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]]
 ; CHECK-NEXT:    [[ADD8:%.*]] = tail call fast double @llvm.maxnum.f64(double [[SUB]], double -0.000000e+00)
-; CHECK-NEXT:    [[V0_2]] = fadd double [[V0_011]], [[ADD8]]
+; CHECK-NEXT:    [[V0_2]] = fadd reassoc arcp contract afn double [[V0_011]], [[ADD8]]
 ; CHECK-NEXT:    [[ADD4:%.*]] = select i1 [[CMP1]], double [[MUL3]], double -0.000000e+00
-; CHECK-NEXT:    [[V1_2]] = fadd double [[V1_012]], [[ADD4]]
+; CHECK-NEXT:    [[V1_2]] = fadd reassoc arcp contract afn double [[V1_012]], [[ADD4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[FOR_END_LOOPEXIT]]:
-; CHECK-NEXT:    [[V0_1:%.*]] = phi double [ [[TMP19]], %[[MIDDLE_BLOCK]] ], [ [[V0_2]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[V0_1:%.*]] = phi double [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ [[V0_2]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[V1_1:%.*]] = phi double [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ [[V1_2]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]]
 ; CHECK-NEXT:    br label %[[FOR_END]]

>From 9b86c6bf1dc9fd71134646353b9953a658a406c5 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Fri, 1 Nov 2024 10:51:56 +0800
Subject: [PATCH 4/4] [InstCombine] Adjust comments. NFC.

---
 llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 68ac11a5fb51a4..162ebebf1b8f97 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -562,9 +562,11 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
         BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel);
     BO->copyIRFlags(TVI);
     if (isa<FPMathOperator>(&SI)) {
-      // Merge poison generating flags from the select
+      // Merge poison generating flags from the select.
       BO->setHasNoNaNs(BO->hasNoNaNs() && FMF.noNaNs());
       BO->setHasNoInfs(BO->hasNoInfs() && FMF.noInfs());
+      // Merge no-signed-zeros flag from the select.
+      // Otherwise we may produce zeros with different sign.
       BO->setHasNoSignedZeros(BO->hasNoSignedZeros() && FMF.noSignedZeros());
     }
     return BO;