[llvm] [InstCombine] Fix for folding `select` into floating point binary operators. (PR #83200)

Tue Feb 27 17:31:54 PST 2024

https://github.com/michele-scandale updated https://github.com/llvm/llvm-project/pull/83200

>From 87eb1f630a4a0636bebac9acfa8921848138cc69 Mon Sep 17 00:00:00 2001
From: Michele Scandale <michele.scandale at gmail.com>
Date: Tue, 27 Feb 2024 14:32:00 -0800
Subject: [PATCH 1/2] [InstCombine] Fix for folding `select` into floating
 point binary operators.

Folding a `select` into a floating point binary operators can only be
done if the result is preserved for both case. In particular, if the
other operand of the `select` can be a NaN, then the transformation
won't preserve the result value.
---
 .../InstCombine/InstCombineSelect.cpp         |   5 +-
 .../InstCombine/fold-select-fmul-if-zero.ll   |  56 ++---
 .../select-binop-foldable-floating-point.ll   | 196 ++++++++++--------
 .../Transforms/InstCombine/select_meta.ll     |   8 +-
 .../LoopVectorize/ARM/mve-selectandorcost.ll  |   4 +-
 .../LoopVectorize/reduction-inloop-cond.ll    |   8 +-
 .../LoopVectorize/reduction-inloop-pred.ll    |  58 +++---
 .../LoopVectorize/reduction-inloop.ll         |  14 +-
 .../Transforms/LoopVectorize/reduction.ll     |  14 +-
 ...ting-sinking-required-for-vectorization.ll |   8 +-
 10 files changed, 198 insertions(+), 173 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 71fa9b9ba41ebb..298cd157ced141 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -527,8 +527,11 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
     // instructions have different flags and add tests to ensure the
     // behaviour is correct.
     FastMathFlags FMF;
-    if (isa<FPMathOperator>(&SI))
+    if (isa<FPMathOperator>(&SI)) {
       FMF = SI.getFastMathFlags();
+      if (!computeKnownFPClass(FalseVal, FMF, fcNan, &SI).isKnownNeverNaN())
+        return nullptr;
+    }
     Constant *C = ConstantExpr::getBinOpIdentity(
         TVI->getOpcode(), TVI->getType(), true, FMF.noSignedZeros());
     Value *OOp = TVI->getOperand(2 - OpToFold);
diff --git a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
index a7adcd1ac61e7f..dedd12f8cc7a3d 100644
--- a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
+++ b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
@@ -427,13 +427,13 @@ define float @fmul_by_snan_if_0_oeq_zero_f32(float %x) {
 define float @fmul_by_var_if_0_oeq_zero_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul float %x, %y
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
@@ -441,14 +441,14 @@ define float @fmul_by_fabs_var_if_0_oeq_zero_f32(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_fabs_var_if_0_oeq_zero_f32(
 ; CHECK-NEXT:    [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y_FABS]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y_FABS]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %y.fabs = call float @llvm.fabs.f32(float %y)
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul float %x, %y.fabs
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
@@ -467,13 +467,13 @@ define float @fmul_by_fabs_nnan_ninf_var_if_0_oeq_zero_f32(float %x, float %y) {
 define float @fmul_by_var_if_0_oeq_zero_f32_nsz_fmul(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nsz_fmul(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nsz float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul nsz float %x, %y
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
@@ -481,13 +481,13 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_fmul(float %x, float %y) {
 define float @fmul_by_var_if_0_oeq_zero_f32_nsz_ninf_fmul(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nsz_ninf_fmul(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul ninf nsz float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul nsz ninf float %x, %y
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
@@ -495,13 +495,13 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_ninf_fmul(float %x, float %y) {
 define float @fmul_by_var_if_0_oeq_zero_f32_nsz_nnan_fmul(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nsz_nnan_fmul(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan nsz float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul nsz nnan float %x, %y
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
@@ -509,13 +509,13 @@ define float @fmul_by_var_if_0_oeq_zero_f32_nsz_nnan_fmul(float %x, float %y) {
 define float @fmul_by_var_if_0_oeq_zero_f32_nnan_ninf_fmul(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_nnan_ninf_fmul(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul nnan ninf float %x, %y
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
@@ -558,26 +558,26 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_nsz_inverted(f
 define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf nsz float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul nnan ninf nsz float %x, %y
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
 define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted(float %x, float %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf nsz float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul nnan ninf nsz float %y, %x
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
@@ -585,26 +585,26 @@ define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz_commuted(float %x
 define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero(float %x, float nofpclass(nzero) %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul nnan ninf float %x, %y
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
 define float @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero_negsub(float %x, float nofpclass(nzero nsub) %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_nnan_ninf_select_known_never_negzero_negsub(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul nnan ninf float %x, %y
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
@@ -622,26 +622,26 @@ define float @fmul_by_var_if_0_oeq_zero_f32_known_never_nan_inf_select_nsz(float
 define float @fmul_by_var_if_0_oeq_zero_f32_fmul_known_never_nan_inf_negzero(float %x, float nofpclass(nan inf nzero) %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_known_never_nan_inf_negzero(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul float %x, %y
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
 define float @fmul_by_var_if_0_oeq_zero_f32_fmul_known_never_nan_inf_negzero_nsub(float %x, float nofpclass(nan inf nzero nsub) %y) {
 ; CHECK-LABEL: @fmul_by_var_if_0_oeq_zero_f32_fmul_known_never_nan_inf_negzero_nsub(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[Y:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul float %x, %y
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
@@ -692,26 +692,26 @@ define float @fmul_by_var_if_not_one_0_zero_f32_assume_finite_fmul_nsz(float %x,
 define float @fmul_by_self_if_0_oeq_zero_f32(float %x) {
 ; CHECK-LABEL: @fmul_by_self_if_0_oeq_zero_f32(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul float %x, %x
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
 define float @fmul_by_self_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(float %x) {
 ; CHECK-LABEL: @fmul_by_self_if_0_oeq_zero_f32_fmul_nnan_ninf_nsz(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT:    [[SCALED_X:%.*]] = select i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00
+; CHECK-NEXT:    [[SCALED_X:%.*]] = select nnan i1 [[X_IS_ZERO]], float [[X]], float 1.000000e+00
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = fmul nnan ninf nsz float [[SCALED_X]], [[X]]
 ; CHECK-NEXT:    ret float [[SCALED_IF_DENORMAL]]
 ;
   %x.is.zero = fcmp oeq float %x, 0.0
   %scaled.x = fmul nnan ninf nsz float %x, %x
-  %scaled.if.denormal = select i1 %x.is.zero, float %scaled.x, float %x
+  %scaled.if.denormal = select nnan i1 %x.is.zero, float %scaled.x, float %x
   ret float %scaled.if.denormal
 }
 
diff --git a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
index 496854c7d731ab..77ff16a8b2e3d8 100644
--- a/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
+++ b/llvm/test/Transforms/InstCombine/select-binop-foldable-floating-point.ll
@@ -1,8 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
-define float @select_fadd(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fadd(
+define float @select_maybe_nan_fadd(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_maybe_nan_fadd(
+; CHECK-NEXT:    [[C:%.*]] = fadd float [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    ret float [[D]]
+;
+  %C = fadd float %A, %B
+  %D = select i1 %cond, float %C, float %A
+  ret float %D
+}
+
+define float @select_fpclass_fadd(i1 %cond, float nofpclass(nan) %A, float %B) {
+; CHECK-LABEL: @select_fpclass_fadd(
 ; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
 ; CHECK-NEXT:    [[D:%.*]] = fadd float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
@@ -12,41 +23,52 @@ define float @select_fadd(i1 %cond, float %A, float %B) {
   ret float %D
 }
 
-define float @select_fadd_swapped(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fadd_swapped(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
+define float @select_nnan_fadd(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fadd(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
 ; CHECK-NEXT:    [[D:%.*]] = fadd float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd float %A, %B
-  %D = select i1 %cond, float %A, float %C
+  %D = select nnan i1 %cond, float %C, float %A
   ret float %D
 }
 
-define float @select_fadd_fast_math(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fadd_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
+define float @select_nnan_fadd_swapped(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fadd_swapped(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fadd float [[C]], [[A:%.*]]
+; CHECK-NEXT:    ret float [[D]]
+;
+  %C = fadd float %A, %B
+  %D = select nnan i1 %cond, float %A, float %C
+  ret float %D
+}
+
+define float @select_nnan_fadd_fast_math(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fadd_fast_math(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float -0.000000e+00
 ; CHECK-NEXT:    [[D:%.*]] = fadd fast float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd fast float %A, %B
-  %D = select i1 %cond, float %C, float %A
+  %D = select nnan i1 %cond, float %C, float %A
   ret float %D
 }
 
-define float @select_fadd_swapped_fast_math(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fadd_swapped_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
+define float @select_nnan_fadd_swapped_fast_math(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fadd_swapped_fast_math(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float -0.000000e+00, float [[B:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = fadd fast float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fadd fast float %A, %B
-  %D = select i1 %cond, float %A, float %C
+  %D = select nnan i1 %cond, float %A, float %C
   ret float %D
 }
 
-define <4 x float> @select_nsz_fadd_v4f32(<4 x i1> %cond, <4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: @select_nsz_fadd_v4f32(
+define <4 x float> @select_nnan_nsz_fadd_v4f32(<4 x i1> %cond, <4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @select_nnan_nsz_fadd_v4f32(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan nsz <4 x i1> [[COND:%.*]], <4 x float> [[B:%.*]], <4 x float> zeroinitializer
 ; CHECK-NEXT:    [[D:%.*]] = fadd nnan nsz <4 x float> [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret <4 x float> [[D]]
@@ -56,202 +78,202 @@ define <4 x float> @select_nsz_fadd_v4f32(<4 x i1> %cond, <4 x float> %A, <4 x f
   ret <4 x float> %D
 }
 
-define <vscale x 4 x float> @select_nsz_fadd_nxv4f32(<vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %B) {
-; CHECK-LABEL: @select_nsz_fadd_nxv4f32(
+define <vscale x 4 x float> @select_nnan_nsz_fadd_nxv4f32(<vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %B) {
+; CHECK-LABEL: @select_nnan_nsz_fadd_nxv4f32(
 ; CHECK-NEXT:    [[C:%.*]] = select nnan nsz <vscale x 4 x i1> [[COND:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> zeroinitializer
 ; CHECK-NEXT:    [[D:%.*]] = fadd nnan nsz <vscale x 4 x float> [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[D]]
 ;
-  %C = fadd nsz nnan <vscale x 4 x float> %A, %B
-  %D = select nsz nnan <vscale x 4 x i1> %cond, <vscale x 4 x float> %C, <vscale x 4 x float> %A
+  %C = fadd nnan nsz <vscale x 4 x float> %A, %B
+  %D = select nnan nsz <vscale x 4 x i1> %cond, <vscale x 4 x float> %C, <vscale x 4 x float> %A
   ret <vscale x 4 x float> %D
 }
 
-define <vscale x 4 x float> @select_nsz_fadd_nxv4f32_swapops(<vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %B) {
-; CHECK-LABEL: @select_nsz_fadd_nxv4f32_swapops(
+define <vscale x 4 x float> @select_nnan_nsz_fadd_nxv4f32_swapops(<vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %B) {
+; CHECK-LABEL: @select_nnan_nsz_fadd_nxv4f32_swapops(
 ; CHECK-NEXT:    [[C:%.*]] = select fast <vscale x 4 x i1> [[COND:%.*]], <vscale x 4 x float> zeroinitializer, <vscale x 4 x float> [[B:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = fadd fast <vscale x 4 x float> [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[D]]
 ;
   %C = fadd fast <vscale x 4 x float> %A, %B
-  %D = select fast <vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %C
+  %D = select nnan fast <vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %C
   ret <vscale x 4 x float> %D
 }
 
-define float @select_fmul(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fmul(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
+define float @select_nnan_fmul(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fmul(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[D:%.*]] = fmul float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul float %A, %B
-  %D = select i1 %cond, float %C, float %A
+  %D = select nnan i1 %cond, float %C, float %A
   ret float %D
 }
 
-define float @select_fmul_swapped(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fmul_swapped(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
+define float @select_nnan_fmul_swapped(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fmul_swapped(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = fmul float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul float %A, %B
-  %D = select i1 %cond, float %A, float %C
+  %D = select nnan i1 %cond, float %A, float %C
   ret float %D
 }
 
-define float @select_fmul_fast_math(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fmul_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
+define float @select_nnan_fmul_fast_math(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fmul_fast_math(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[D:%.*]] = fmul fast float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul fast float %A, %B
-  %D = select i1 %cond, float %C, float %A
+  %D = select nnan i1 %cond, float %C, float %A
   ret float %D
 }
 
-define float @select_fmul_swapped_fast_math(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fmul_swapped_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
+define float @select_nnan_fmul_swapped_fast_math(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fmul_swapped_fast_math(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = fmul fast float [[C]], [[A:%.*]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fmul fast float %A, %B
-  %D = select i1 %cond, float %A, float %C
+  %D = select nnan i1 %cond, float %A, float %C
   ret float %D
 }
 
-define float @select_fsub(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fsub(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00
+define float @select_nnan_fsub(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fsub(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00
 ; CHECK-NEXT:    [[D:%.*]] = fsub float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub float %A, %B
-  %D = select i1 %cond, float %C, float %A
+  %D = select nnan i1 %cond, float %C, float %A
   ret float %D
 }
 
-define float @select_fsub_swapped(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fsub_swapped(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]]
+define float @select_nnan_fsub_swapped(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fsub_swapped(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = fsub float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub float %A, %B
-  %D = select i1 %cond, float %A, float %C
+  %D = select nnan i1 %cond, float %A, float %C
   ret float %D
 }
 
-define float @select_fsub_fast_math(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fsub_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00
+define float @select_nnan_fsub_fast_math(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fsub_fast_math(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 0.000000e+00
 ; CHECK-NEXT:    [[D:%.*]] = fsub fast float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub fast float %A, %B
-  %D = select i1 %cond, float %C, float %A
+  %D = select nnan i1 %cond, float %C, float %A
   ret float %D
 }
 
-define float @select_fsub_swapped_fast_math(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fsub_swapped_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]]
+define float @select_nnan_fsub_swapped_fast_math(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fsub_swapped_fast_math(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 0.000000e+00, float [[B:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = fsub fast float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub fast float %A, %B
-  %D = select i1 %cond, float %A, float %C
+  %D = select nnan i1 %cond, float %A, float %C
   ret float %D
 }
 
-define <4 x float> @select_nsz_fsub_v4f32(<4 x i1> %cond, <4 x float> %A, <4 x float> %B) {
-; CHECK-LABEL: @select_nsz_fsub_v4f32(
-; CHECK-NEXT:    [[C:%.*]] = select nsz <4 x i1> [[COND:%.*]], <4 x float> [[B:%.*]], <4 x float> zeroinitializer
+define <4 x float> @select_nnan_nsz_fsub_v4f32(<4 x i1> %cond, <4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: @select_nnan_nsz_fsub_v4f32(
+; CHECK-NEXT:    [[C:%.*]] = select nnan nsz <4 x i1> [[COND:%.*]], <4 x float> [[B:%.*]], <4 x float> zeroinitializer
 ; CHECK-NEXT:    [[D:%.*]] = fsub <4 x float> [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret <4 x float> [[D]]
 ;
   %C = fsub <4 x float> %A, %B
-  %D = select nsz <4 x i1> %cond, <4 x float> %C, <4 x float> %A
+  %D = select nnan nsz <4 x i1> %cond, <4 x float> %C, <4 x float> %A
   ret <4 x float> %D
 }
 
-define <vscale x 4 x float> @select_nsz_fsub_nxv4f32(<vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %B) {
-; CHECK-LABEL: @select_nsz_fsub_nxv4f32(
-; CHECK-NEXT:    [[C:%.*]] = select nsz <vscale x 4 x i1> [[COND:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> zeroinitializer
+define <vscale x 4 x float> @select_nnan_nsz_fsub_nxv4f32(<vscale x 4 x i1> %cond, <vscale x 4 x float> %A, <vscale x 4 x float> %B) {
+; CHECK-LABEL: @select_nnan_nsz_fsub_nxv4f32(
+; CHECK-NEXT:    [[C:%.*]] = select nnan nsz <vscale x 4 x i1> [[COND:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> zeroinitializer
 ; CHECK-NEXT:    [[D:%.*]] = fsub <vscale x 4 x float> [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[D]]
 ;
   %C = fsub <vscale x 4 x float> %A, %B
-  %D = select nsz <vscale x 4 x i1> %cond, <vscale x 4 x float> %C, <vscale x 4 x float> %A
+  %D = select nnan nsz <vscale x 4 x i1> %cond, <vscale x 4 x float> %C, <vscale x 4 x float> %A
   ret <vscale x 4 x float> %D
 }
 
 ; 'fsub' can only fold on the amount subtracted.
-define float @select_fsub_invalid(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fsub_invalid(
+define float @select_nnan_fsub_invalid(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fsub_invalid(
 ; CHECK-NEXT:    [[C:%.*]] = fsub float [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    [[D:%.*]] = select nnan i1 [[COND:%.*]], float [[C]], float [[A]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fsub float %B, %A
-  %D = select i1 %cond, float %C, float %A
+  %D = select nnan i1 %cond, float %C, float %A
   ret float %D
 }
 
-define float @select_fdiv(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fdiv(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
+define float @select_nnan_fdiv(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fdiv(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[D:%.*]] = fdiv float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv float %A, %B
-  %D = select i1 %cond, float %C, float %A
+  %D = select nnan i1 %cond, float %C, float %A
   ret float %D
 }
 
-define float @select_fdiv_swapped(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fdiv_swapped(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
+define float @select_nnan_fdiv_swapped(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fdiv_swapped(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = fdiv float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv float %A, %B
-  %D = select i1 %cond, float %A, float %C
+  %D = select nnan i1 %cond, float %A, float %C
   ret float %D
 }
 
-define float @select_fdiv_fast_math(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fdiv_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
+define float @select_nnan_fdiv_fast_math(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fdiv_fast_math(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float [[B:%.*]], float 1.000000e+00
 ; CHECK-NEXT:    [[D:%.*]] = fdiv fast float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv fast float %A, %B
-  %D = select i1 %cond, float %C, float %A
+  %D = select nnan i1 %cond, float %C, float %A
   ret float %D
 }
 
-define float @select_fdiv_swapped_fast_math(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fdiv_swapped_fast_math(
-; CHECK-NEXT:    [[C:%.*]] = select i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
+define float @select_nnan_fdiv_swapped_fast_math(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fdiv_swapped_fast_math(
+; CHECK-NEXT:    [[C:%.*]] = select nnan i1 [[COND:%.*]], float 1.000000e+00, float [[B:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = fdiv fast float [[A:%.*]], [[C]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv fast float %A, %B
-  %D = select i1 %cond, float %A, float %C
+  %D = select nnan i1 %cond, float %A, float %C
   ret float %D
 }
 
 ; 'fdiv' can only fold on the divisor amount.
-define float @select_fdiv_invalid(i1 %cond, float %A, float %B) {
-; CHECK-LABEL: @select_fdiv_invalid(
+define float @select_nnan_fdiv_invalid(i1 %cond, float %A, float %B) {
+; CHECK-LABEL: @select_nnan_fdiv_invalid(
 ; CHECK-NEXT:    [[C:%.*]] = fdiv float [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = select i1 [[COND:%.*]], float [[C]], float [[A]]
+; CHECK-NEXT:    [[D:%.*]] = select nnan i1 [[COND:%.*]], float [[C]], float [[A]]
 ; CHECK-NEXT:    ret float [[D]]
 ;
   %C = fdiv float %B, %A
-  %D = select i1 %cond, float %C, float %A
+  %D = select nnan i1 %cond, float %C, float %A
   ret float %D
 }
diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll
index cd133101736cfc..aa794e82e0fdc6 100644
--- a/llvm/test/Transforms/InstCombine/select_meta.ll
+++ b/llvm/test/Transforms/InstCombine/select_meta.ll
@@ -360,23 +360,23 @@ define i128 @select_ashr(i1 %cond, i128 %x, i128 %y) {
 
 define double @select_fmul(i1 %cond, double %x, double %y) {
 ; CHECK-LABEL: @select_fmul(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], double [[Y:%.*]], double 1.000000e+00, !prof [[PROF0]], !unpredictable [[META2]]
+; CHECK-NEXT:    [[OP:%.*]] = select nnan i1 [[COND:%.*]], double [[Y:%.*]], double 1.000000e+00, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = fmul double [[OP]], [[X:%.*]]
 ; CHECK-NEXT:    ret double [[RET]]
 ;
   %op = fmul double %x, %y
-  %ret = select i1 %cond, double %op, double %x, !prof !1, !unpredictable !3
+  %ret = select nnan i1 %cond, double %op, double %x, !prof !1, !unpredictable !3
   ret double %ret
 }
 
 define <2 x float> @select_fdiv(i1 %cond, <2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @select_fdiv(
-; CHECK-NEXT:    [[OP:%.*]] = select i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, !prof [[PROF0]], !unpredictable [[META2]]
+; CHECK-NEXT:    [[OP:%.*]] = select nnan i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = fdiv <2 x float> [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %op = fdiv <2 x float> %x, %y
-  %ret = select i1 %cond, <2 x float> %op, <2 x float> %x, !prof !1, !unpredictable !3
+  %ret = select nnan i1 %cond, <2 x float> %op, <2 x float> %x, !prof !1, !unpredictable !3
   ret <2 x float> %ret
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
index 938e1deab0b6c9..69538343356693 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-selectandorcost.ll
@@ -45,8 +45,8 @@ define float @test(ptr nocapture readonly %pA, ptr nocapture readonly %pB, i32 %
 ; CHECK-NEXT:    [[TMP9:%.*]] = fsub fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD6]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP9]])
 ; CHECK-NEXT:    [[TMP11:%.*]] = fdiv fast <4 x float> [[TMP10]], [[TMP8]]
-; CHECK-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[DOTNOT8]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[TMP11]]
-; CHECK-NEXT:    [[PREDPHI]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP12]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast <4 x float> [[TMP11]], [[VEC_PHI]]
+; CHECK-NEXT:    [[PREDPHI]] = select <4 x i1> [[DOTNOT8]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP12]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
index 3ba57821bc31b0..873f6364f82811 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
@@ -850,8 +850,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP24:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP25:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP24]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
-; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP25]]
+; CHECK-NEXT:    [[TMP25:%.*]] = fadd fast <4 x float> [[TMP24]], [[VEC_PHI]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP25]], <4 x float> [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x i1> [[TMP26]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
@@ -889,8 +889,8 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; CHECK:       pred.load.continue14:
 ; CHECK-NEXT:    [[TMP46:%.*]] = phi <4 x float> [ [[TMP41]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP45]], [[PRED_LOAD_IF13]] ]
-; CHECK-NEXT:    [[TMP47:%.*]] = select <4 x i1> [[TMP26]], <4 x float> [[TMP46]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
-; CHECK-NEXT:    [[PREDPHI15]] = fadd fast <4 x float> [[PREDPHI]], [[TMP47]]
+; CHECK-NEXT:    [[TMP47:%.*]] = fadd fast <4 x float> [[TMP46]], [[PREDPHI]]
+; CHECK-NEXT:    [[PREDPHI15]] = select <4 x i1> [[TMP26]], <4 x float> [[TMP47]], <4 x float> [[PREDPHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
index b1c5ccbead64e5..e79f0983a63785 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -65,7 +65,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
-; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       ._crit_edge:
 ; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
@@ -1173,7 +1173,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
@@ -1216,11 +1216,11 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]])
-; CHECK-NEXT:    [[TMP26]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
+; CHECK-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
-; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
+; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -1228,7 +1228,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
 ;
 entry:
@@ -1260,7 +1260,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
@@ -1303,11 +1303,11 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]])
-; CHECK-NEXT:    [[TMP26]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
+; CHECK-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
-; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
+; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -1315,7 +1315,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
 ;
 entry:
@@ -1351,25 +1351,25 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP8:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP8]], [[TMP7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[TMP11]]
-; CHECK-NEXT:    [[PREDPHI2:%.*]] = select <4 x i1> [[TMP13]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[PREDPHI_V]]
-; CHECK-NEXT:    [[PREDPHI3]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
+; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[TMP9]]
+; CHECK-NEXT:    [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
-; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
+; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP15:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI3]])
+; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI3]])
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -1386,7 +1386,7 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP29:![0-9]+]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi float [ poison, [[FOR_INC]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi float [ poison, [[FOR_INC]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret float [[SUM_1_LCSSA]]
 ;
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index d85241167d0cd8..e8d57c935144a1 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -690,16 +690,16 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[TMP9]]
-; CHECK-NEXT:    [[PREDPHI2:%.*]] = select <4 x i1> [[TMP11]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[PREDPHI_V]]
-; CHECK-NEXT:    [[PREDPHI3]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI2]]
+; CHECK-NEXT:    [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index ba82bac6fad266..a47a38510eeeb3 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -761,16 +761,16 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[TMP9]]
-; CHECK-NEXT:    [[PREDPHI2:%.*]] = select <4 x i1> [[TMP11]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[PREDPHI_V]]
-; CHECK-NEXT:    [[PREDPHI3]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI2]]
+; CHECK-NEXT:    [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
index 33cfd0aa8574e0..ad100c399c08ed 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
@@ -164,10 +164,10 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META11:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META11]]
-; CHECK-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP2]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD10]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd <4 x float> [[TMP6]], [[TMP10]]
-; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP3]], <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <4 x float> [[WIDE_LOAD11]]
-; CHECK-NEXT:    [[PREDPHI12:%.*]] = fadd <4 x float> [[TMP7]], [[TMP11]]
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x float> [[TMP6]], [[WIDE_LOAD10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD11]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> [[TMP6]], <4 x float> [[TMP10]]
+; CHECK-NEXT:    [[PREDPHI12:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP11]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
 ; CHECK-NEXT:    store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META11]]
 ; CHECK-NEXT:    store <4 x float> [[PREDPHI12]], ptr [[TMP12]], align 4, !alias.scope [[META9]], !noalias [[META11]]

>From a45bbd2abe2f809c52c5adfc7c1fb21de8f9deea Mon Sep 17 00:00:00 2001
From: Michele Scandale <michele.scandale at gmail.com>
Date: Tue, 27 Feb 2024 17:30:57 -0800
Subject: [PATCH 2/2] Delay NaN check.

---
 .../InstCombine/InstCombineSelect.cpp         | 36 ++++++++++---------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 298cd157ced141..d603a6360ad3b2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -527,11 +527,8 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
     // instructions have different flags and add tests to ensure the
     // behaviour is correct.
     FastMathFlags FMF;
-    if (isa<FPMathOperator>(&SI)) {
+    if (isa<FPMathOperator>(&SI))
       FMF = SI.getFastMathFlags();
-      if (!computeKnownFPClass(FalseVal, FMF, fcNan, &SI).isKnownNeverNaN())
-        return nullptr;
-    }
     Constant *C = ConstantExpr::getBinOpIdentity(
         TVI->getOpcode(), TVI->getType(), true, FMF.noSignedZeros());
     Value *OOp = TVI->getOperand(2 - OpToFold);
@@ -539,19 +536,24 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
     // between 0, 1 and -1.
     const APInt *OOpC;
     bool OOpIsAPInt = match(OOp, m_APInt(OOpC));
-    if (!isa<Constant>(OOp) ||
-        (OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) {
-      Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp,
-                                           Swapped ? OOp : C, "", &SI);
-      if (isa<FPMathOperator>(&SI))
-        cast<Instruction>(NewSel)->setFastMathFlags(FMF);
-      NewSel->takeName(TVI);
-      BinaryOperator *BO =
-          BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel);
-      BO->copyIRFlags(TVI);
-      return BO;
-    }
-    return nullptr;
+    if (isa<Constant>(OOp) &&
+        (!OOpIsAPInt || !isSelect01(C->getUniqueInteger(), *OOpC)))
+      return nullptr;
+
+    // Cannot fold if the false value might be a NaN.
+    if (isa<FPMathOperator>(&SI) &&
+        !computeKnownFPClass(FalseVal, FMF, fcNan, &SI).isKnownNeverNaN())
+      return nullptr;
+
+    Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp,
+                                         Swapped ? OOp : C, "", &SI);
+    if (isa<FPMathOperator>(&SI))
+      cast<Instruction>(NewSel)->setFastMathFlags(FMF);
+    NewSel->takeName(TVI);
+    BinaryOperator *BO =
+        BinaryOperator::Create(TVI->getOpcode(), FalseVal, NewSel);
+    BO->copyIRFlags(TVI);
+    return BO;
   };
 
   if (Instruction *R = TryFoldSelectIntoOp(SI, TrueVal, FalseVal, false))